In [19]:
import matplotlib.pyplot as plt
import pandas as pd
import requests
import seaborn as sns
import json
from tqdm import tqdm

%matplotlib inline

info_df = pd.read_csv("carpark_info.csv")
info_df.head()

Unnamed: 0,carpark_number,address,x_coord,y_coord,car_park_type,type_of_parking_system,short_term_parking,free_parking,night_parking,longitude,latitude
0,ACB,BLK 270/271 ALBERT CENTRE BASEMENT CAR PARK,30314.7936,31490.4942,BASEMENT CAR PARK,ELECTRONIC PARKING,WHOLE DAY,NO,YES,103.854118,1.301063
1,ACM,BLK 98A ALJUNIED CRESCENT,33758.4143,33695.5198,MULTI-STOREY CAR PARK,ELECTRONIC PARKING,WHOLE DAY,SUN & PH FR 7AM-10.30PM,YES,103.885061,1.321004
2,AH1,BLK 101 JALAN DUSUN,29257.7203,34500.3599,SURFACE CAR PARK,ELECTRONIC PARKING,WHOLE DAY,SUN & PH FR 7AM-10.30PM,YES,103.84462,1.328283
3,AK19,BLOCK 253 ANG MO KIO STREET 21,28185.4359,39012.6664,SURFACE CAR PARK,COUPON PARKING,7AM-7PM,NO,NO,103.834985,1.369091
4,AK31,BLK 302/348 ANG MO KIO ST 31,29482.029,38684.1754,SURFACE CAR PARK,COUPON PARKING,NO,NO,NO,103.846636,1.36612


## Specify Dates

In [127]:
start_date = '2018-01-01'
end_date = '2018-01-31'

date_list =pd.date_range(start_date, end_date, freq='1D')
date_list

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
               '2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12',
               '2018-01-13', '2018-01-14', '2018-01-15', '2018-01-16',
               '2018-01-17', '2018-01-18', '2018-01-19', '2018-01-20',
               '2018-01-21', '2018-01-22', '2018-01-23', '2018-01-24',
               '2018-01-25', '2018-01-26', '2018-01-27', '2018-01-28',
               '2018-01-29', '2018-01-30', '2018-01-31'],
              dtype='datetime64[ns]', freq='D')

## Define function to extract number from string

In [8]:
def extract_number(x):
    str = x
    return [int(s) for s in str.split('\"') if s.isdigit()][0]

## Define function to extract single character from string

In [9]:
def extract_alpha(x):
    str = x
    return [s for s in str.split('\"') if s.isalpha()][0]

In [128]:
def carpark_data(date):
    # Get carpark availability at 30-min intervals on a specific date
    start_datetime = date.strftime("%Y-%m-%d") + ' 07:05:00'
    end_datetime = date.strftime("%Y-%m-%d") + ' 09:35:00'


    carpark_df = pd.DataFrame(columns=['lot_type', 'total_lots', 'lots_available', 'timestamp (SGT)'])
    for dt in pd.date_range(start_datetime, end_datetime, freq='30min'):  # I remember this was a wet day
        r = requests.get('https://api.data.gov.sg/v1/transport/carpark-availability',
                         params={'date_time': dt.strftime('%Y-%m-%dT%H:%M:%S')})
        try:
            temp_df = pd.DataFrame(r.json()['items'][0]['carpark_data'])
        except ValueError:
            print('No data JSONDecodeError {}'.format(dt))
            continue
        except KeyError:
            print('No items KeyError {}'.format(dt))
            continue
        #temp_df = temp_df.rename(columns={'lots_available': 'lots_available'})
    
        #temp_df['timestamp (SGT)'] = pd.to_datetime(r.json()['items'][0]['timestamp'].split('+')[0])
        temp_df['timestamp (SGT)'] = dt
        carpark_df = carpark_df.append(temp_df, ignore_index=True)
    carpark_df.head()
    
    carpark_df['lots_available']=''
    df3 = pd.DataFrame(carpark_df['carpark_info'].values.tolist(), columns=['carpark_info1','carpark_info2','carpark_info3'])
    df3['carpark_info1'] = df3['carpark_info1'].apply(json.dumps)
    temp_df1, temp_df2, temp_df3 = df3['carpark_info1'].str.split(',').str
    
    temp_df1.apply(extract_number).head()
    temp_df2.apply(extract_alpha).head()
    temp_df3.apply(extract_number).head()

    carpark_df['lot_type'] = temp_df2.apply(extract_alpha)
    carpark_df['total_lots'] = temp_df1.apply(extract_number)
    carpark_df['lots_available'] = temp_df3.apply(extract_number)
    
    #drop lot type other than tpye 'C'
    carpark_df = carpark_df.groupby(['lot_type']).get_group('C')
    
    # calculate change in lots available
    grouped = carpark_df.groupby(['carpark_number'], axis=0)
    temp2_df = pd.DataFrame(grouped['lots_available'].rolling(window=2).apply(lambda x: x[0] - x[1]))
    temp2_df = temp2_df.reset_index()
    change_df = temp2_df.groupby(['carpark_number'], axis=0).mean().filter(items=['carpark_number','lots_available'])
    change_df = change_df.rename(columns={'lots_available': 'change'})
    change_df = change_df.reset_index()
    
    #print(carpark_df)

    # calculate the means
    carparkmean_df = carpark_df.groupby('carpark_number').mean() # some columns missing (non numerical columns) after this operation
    carparkmean_df['date'] = date
    carparkmean_df = carparkmean_df.reset_index()
    
    carpark_outer_df = carparkmean_df.filter(items=['carpark_number','date','lots_available','total_lots'])
    
    outer_df = pd.merge(carpark_outer_df, change_df, how='inner', on=['carpark_number'])
    
    #return carpark_df
    #return carpark_outer_df
    return outer_df

## Main Program

In [129]:
carparkmean_df = pd.DataFrame()
for date in tqdm(date_list):
    temp_df = carpark_data(date)
    carparkmean_df = carparkmean_df.append(temp_df, ignore_index=True)

carparkmean_df

# Union of wind-speed and station data
outer_df = pd.merge(carparkmean_df, info_df, how='inner', on=['carpark_number'])
#outer_df.head(15)

filename = 'carparkmean_{}to{}.csv'.format(start_date, end_date)
filename
outer_df.to_csv(filename, index = False)

 35%|███▌      | 11/31 [01:18<02:22,  7.10s/it]

No items KeyError 2018-01-12 08:05:00


 65%|██████▍   | 20/31 [02:43<01:30,  8.20s/it]

No items KeyError 2018-01-21 09:35:00


 68%|██████▊   | 21/31 [03:14<01:32,  9.26s/it]

No data JSONDecodeError 2018-01-22 08:35:00


 77%|███████▋  | 24/31 [04:30<01:18, 11.28s/it]

No data JSONDecodeError 2018-01-25 09:05:00


 81%|████████  | 25/31 [05:05<01:13, 12.23s/it]

No items KeyError 2018-01-26 08:05:00
No data JSONDecodeError 2018-01-26 09:35:00


 84%|████████▍ | 26/31 [06:27<01:14, 14.91s/it]

No data JSONDecodeError 2018-01-27 09:05:00


100%|██████████| 31/31 [07:46<00:00, 15.06s/it]


In [121]:
outer_df

Unnamed: 0,carpark_number,date,lots_available,total_lots,change,address,x_coord,y_coord,car_park_type,type_of_parking_system,short_term_parking,free_parking,night_parking,longitude,latitude
0,A10,2018-01-01,12.666667,62.0,0.0,BLK 201/202 ANG MO KIO STREET 22,29247.0300,38962.0000,SURFACE CAR PARK,ELECTRONIC PARKING,WHOLE DAY,SUN & PH FR 7AM-10.30PM,YES,103.844524,1.368633
1,A100,2018-01-01,10.500000,67.0,-0.6,BLK 650 ANG MO KIO STREET 61,29033.1895,39996.3712,SURFACE CAR PARK,ELECTRONIC PARKING,7AM-10.30PM,NO,NO,103.842603,1.377987
2,A11,2018-01-01,148.833333,370.0,8.2,BLK 223/226/226A-226D ANG MO KIO ST 22,28541.1400,38785.3600,SURFACE CAR PARK,ELECTRONIC PARKING,WHOLE DAY,SUN & PH FR 7AM-10.30PM,YES,103.838181,1.367036
3,A12,2018-01-01,148.833333,384.0,2.0,BLK 229/230 ANG MO KIO ST 22,28596.1400,38948.3800,SURFACE CAR PARK,ELECTRONIC PARKING,7AM-10.30PM,SUN & PH FR 7AM-10.30PM,NO,103.838675,1.368510
4,A15,2018-01-01,12.833333,57.0,3.2,BLK 226E-226H ANG MO KIO ST 22,28790.0200,38777.8500,SURFACE CAR PARK,ELECTRONIC PARKING,WHOLE DAY,SUN & PH FR 7AM-10.30PM,YES,103.840417,1.366968
5,A2,2018-01-01,98.500000,187.0,-2.4,BLK 206/207 ANG MO KIO STREET 22,29082.5900,38697.0700,SURFACE CAR PARK,ELECTRONIC PARKING,WHOLE DAY,SUN & PH FR 7AM-10.30PM,YES,103.843046,1.366237
6,A20,2018-01-01,58.166667,250.0,4.8,BLK 304/307/319 ANG MO KIO ST 31,29684.7366,38554.0251,SURFACE CAR PARK,ELECTRONIC PARKING,WHOLE DAY,SUN & PH FR 7AM-10.30PM,YES,103.848457,1.364943
7,A24,2018-01-01,80.833333,230.0,8.6,BLK 338/340 ANG MO KIO ST 32,29789.4100,38533.0200,SURFACE CAR PARK,ELECTRONIC PARKING,WHOLE DAY,SUN & PH FR 7AM-10.30PM,YES,103.849398,1.364753
8,A25,2018-01-01,129.833333,514.0,-5.0,BLK 330/337 ANG MO KIO AVE 8,30037.5660,38313.6296,SURFACE CAR PARK,ELECTRONIC PARKING,WHOLE DAY,SUN & PH FR 7AM-10.30PM,YES,103.851627,1.362769
9,A27,2018-01-01,74.500000,251.0,-1.4,BLK 108/109/110 ANG MO KIO ST 11,28556.6700,39153.3000,SURFACE CAR PARK,ELECTRONIC PARKING,7AM-10.30PM,SUN & PH FR 7AM-10.30PM,NO,103.838321,1.370363


## Debug - backup

In [12]:
date = '2018-01-01'
temp_df = carpark_data(date)

AttributeError: 'str' object has no attribute 'strftime'

In [55]:
carparkmean_df = pd.DataFrame()
for date in tqdm(date_list):
    temp_df = carpark_data(date)
    carparkmean_df = carparkmean_df.append(temp_df, ignore_index=True)

carparkmean_df


100%|██████████| 1/1 [00:06<00:00,  6.50s/it]


Unnamed: 0,carpark_info,carpark_number,lot_type,lots_available,timestamp (SGT),total_lots,update_datetime
0,"[{'total_lots': '104', 'lot_type': 'C', 'lots_...",HE12,C,0,2018-01-01 07:05:00,104,2018-01-01T07:03:13
1,"[{'total_lots': '583', 'lot_type': 'C', 'lots_...",HLM,C,529,2018-01-01 07:05:00,583,2018-01-01T07:03:02
2,"[{'total_lots': '329', 'lot_type': 'C', 'lots_...",RHM,C,94,2018-01-01 07:05:00,329,2018-01-01T07:03:03
3,"[{'total_lots': '110', 'lot_type': 'C', 'lots_...",BM29,C,109,2018-01-01 07:05:00,110,2017-08-01T12:15:17
4,"[{'total_lots': '97', 'lot_type': 'C', 'lots_a...",Q81,C,71,2018-01-01 07:05:00,97,2018-01-01T07:03:12
5,"[{'total_lots': '183', 'lot_type': 'C', 'lots_...",C20,C,103,2018-01-01 07:05:00,183,2018-01-01T07:02:53
6,"[{'total_lots': '288', 'lot_type': 'C', 'lots_...",FR3M,C,218,2018-01-01 07:05:00,288,2018-01-01T07:03:01
7,"[{'total_lots': '280', 'lot_type': 'C', 'lots_...",C32,C,0,2018-01-01 07:05:00,280,2018-01-01T07:03:05
8,"[{'total_lots': '337', 'lot_type': 'C', 'lots_...",C6,C,94,2018-01-01 07:05:00,337,2018-01-01T07:02:45
9,"[{'total_lots': '277', 'lot_type': 'C', 'lots_...",TG2,C,0,2018-01-01 07:05:00,277,2018-01-01T07:03:04


In [60]:
grouped = carpark_df.groupby(['carpark_number'], axis=0)
cat_types = grouped.size().index
cat_types

Index(['A10', 'A100', 'A11', 'A12', 'A15', 'A2', 'A20', 'A24', 'A25', 'A27',
       ...
       'Y6', 'Y60M', 'Y61M', 'Y62M', 'Y68L', 'Y68M', 'Y7', 'Y8', 'Y9', 'YHS'],
      dtype='object', name='carpark_number', length=1665)

In [75]:
processed_df = pd.DataFrame()
for i in tqdm(cat_types):
    temp1_df = grouped.get_group(i)
    temp1_df['change'] = temp1_df.shift(1)['lots_available']-temp1_df['lots_available']
    processed_df = processed_df.append(temp1_df, ignore_index=True)
processed_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
100%|██████████| 1665/1665 [01:37<00:00, 17.07it/s]


Unnamed: 0,carpark_info,carpark_number,lot_type,lots_available,timestamp (SGT),total_lots,update_datetime,change
0,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,13,2018-01-01 07:05:00,62,2018-01-01T07:08:26,
1,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,12,2018-01-01 07:35:00,62,2018-01-01T07:38:27,1.0
2,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,13,2018-01-01 08:05:00,62,2018-01-01T08:08:28,-1.0
3,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,13,2018-01-01 08:35:00,62,2018-01-01T08:38:28,0.0
4,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,12,2018-01-01 09:05:00,62,2018-01-01T09:08:29,1.0
5,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,13,2018-01-01 09:35:00,62,2018-01-01T09:36:30,-1.0
6,"[{'total_lots': '67', 'lot_type': 'C', 'lots_a...",A100,C,10,2018-01-01 07:05:00,67,2018-01-01T07:08:37,
7,"[{'total_lots': '67', 'lot_type': 'C', 'lots_a...",A100,C,9,2018-01-01 07:35:00,67,2018-01-01T07:38:38,1.0
8,"[{'total_lots': '67', 'lot_type': 'C', 'lots_a...",A100,C,10,2018-01-01 08:05:00,67,2018-01-01T08:08:39,-1.0
9,"[{'total_lots': '67', 'lot_type': 'C', 'lots_a...",A100,C,10,2018-01-01 08:35:00,67,2018-01-01T08:38:40,0.0


In [87]:
grouped = carpark_df.groupby(['carpark_number'], axis=0)

In [91]:
grouped
list(grouped)

[('A10',
                                             carpark_info carpark_number  \
  1249  [{'total_lots': '62', 'lot_type': 'C', 'lots_a...            A10   
  2916  [{'total_lots': '62', 'lot_type': 'C', 'lots_a...            A10   
  4583  [{'total_lots': '62', 'lot_type': 'C', 'lots_a...            A10   
  6250  [{'total_lots': '62', 'lot_type': 'C', 'lots_a...            A10   
  7917  [{'total_lots': '62', 'lot_type': 'C', 'lots_a...            A10   
  9584  [{'total_lots': '62', 'lot_type': 'C', 'lots_a...            A10   
  
       lot_type  lots_available     timestamp (SGT)  total_lots  \
  1249        C              13 2018-01-01 07:05:00          62   
  2916        C              12 2018-01-01 07:35:00          62   
  4583        C              13 2018-01-01 08:05:00          62   
  6250        C              13 2018-01-01 08:35:00          62   
  7917        C              12 2018-01-01 09:05:00          62   
  9584        C              13 2018-01-01 09:35:00   

In [100]:
temp2_df = pd.DataFrame(grouped['lots_available'].rolling(window=2).apply(lambda x: x[0] - x[1]))

In [109]:
temp2_df = temp2_df.reset_index()
temp2_df

Unnamed: 0,index,carpark_number,level_1,lots_available
0,0,A10,1249,
1,1,A10,2916,1.0
2,2,A10,4583,-1.0
3,3,A10,6250,0.0
4,4,A10,7917,1.0
5,5,A10,9584,-1.0
6,6,A100,1464,
7,7,A100,3131,1.0
8,8,A100,4798,-1.0
9,9,A100,6465,0.0


In [117]:
change_df = temp2_df.groupby(['carpark_number'], axis=0).mean().filter(items=['carpark_number','lots_available'])
change_df = change_df.rename(columns={'lots_available': 'change'})
change_df = change_df.reset_index()
change_df

Unnamed: 0,carpark_number,change
0,A10,0.0
1,A100,-0.6
2,A11,8.2
3,A12,2.0
4,A15,3.2
5,A2,-2.4
6,A20,4.8
7,A24,8.6
8,A25,-5.0
9,A27,-1.4


In [80]:
#temp_df = grouped.get_group(i)
temp1_df = grouped.get_group('A10')
temp1_df

Unnamed: 0,carpark_info,carpark_number,lot_type,lots_available,timestamp (SGT),total_lots,update_datetime
1249,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,13,2018-01-01 07:05:00,62,2018-01-01T07:08:26
2916,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,12,2018-01-01 07:35:00,62,2018-01-01T07:38:27
4583,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,13,2018-01-01 08:05:00,62,2018-01-01T08:08:28
6250,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,13,2018-01-01 08:35:00,62,2018-01-01T08:38:28
7917,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,12,2018-01-01 09:05:00,62,2018-01-01T09:08:29
9584,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,13,2018-01-01 09:35:00,62,2018-01-01T09:36:30


In [86]:
temp1_df['lots_available'].rolling(window=2).apply(lambda x: x[0] - x[1])

1249    NaN
2916    1.0
4583   -1.0
6250    0.0
7917    1.0
9584   -1.0
Name: lots_available, dtype: float64

In [73]:
temp1_df['change'] = temp1_df.shift(1)['lots_available']-temp1_df['lots_available']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [74]:
temp1_df

Unnamed: 0,carpark_info,carpark_number,lot_type,lots_available,timestamp (SGT),total_lots,update_datetime,change
1249,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,13,2018-01-01 07:05:00,62,2018-01-01T07:08:26,
2916,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,12,2018-01-01 07:35:00,62,2018-01-01T07:38:27,1.0
4583,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,13,2018-01-01 08:05:00,62,2018-01-01T08:08:28,-1.0
6250,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,13,2018-01-01 08:35:00,62,2018-01-01T08:38:28,0.0
7917,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,12,2018-01-01 09:05:00,62,2018-01-01T09:08:29,1.0
9584,"[{'total_lots': '62', 'lot_type': 'C', 'lots_a...",A10,C,13,2018-01-01 09:35:00,62,2018-01-01T09:36:30,-1.0


In [69]:
temp1_df.mean()

lots_available    12.666667
total_lots        62.000000
change             0.000000
dtype: float64

In [24]:
carparkmean_df = temp_df.groupby('carpark_number').mean()
carparkmean_df['date'] = date
carparkmean_df = carparkmean_df.reset_index()
carparkmean_df

Unnamed: 0,carpark_number,lots_available,total_lots,date
0,A10,12.666667,62.0,2018-01-01
1,A100,10.500000,67.0,2018-01-01
2,A11,148.833333,370.0,2018-01-01
3,A12,148.833333,384.0,2018-01-01
4,A15,12.833333,57.0,2018-01-01
5,A2,98.500000,187.0,2018-01-01
6,A20,58.166667,250.0,2018-01-01
7,A24,80.833333,230.0,2018-01-01
8,A25,129.833333,514.0,2018-01-01
9,A27,74.500000,251.0,2018-01-01


In [15]:
start_datetime = date.strftime("%Y-%m-%d") + ' 07:05:00'
end_datetime = date.strftime("%Y-%m-%d") + ' 09:35:00'


carpark_df = pd.DataFrame(columns=['lot_type', 'total_lots', 'lots_available', 'timestamp (SGT)'])
for dt in tqdm(pd.date_range(start_datetime, end_datetime, freq='30min')):  # I remember this was a wet day
    r = requests.get('https://api.data.gov.sg/v1/transport/carpark-availability',
                     params={'date_time': dt.strftime('%Y-%m-%dT%H:%M:%S')})
    try:
        temp_df = pd.DataFrame(r.json()['items'][0]['carpark_data'])
    except ValueError:
        print('No data JSONDecodeError {}'.format(dt))
        continue
    except KeyError:
        print('No items KeyError {}'.format(dt))
        continue
        #temp_df = temp_df.rename(columns={'lots_available': 'lots_available'})
    
        #temp_df['timestamp (SGT)'] = pd.to_datetime(r.json()['items'][0]['timestamp'].split('+')[0])
    temp_df['timestamp (SGT)'] = dt
    carpark_df = carpark_df.append(temp_df, ignore_index=True)
    carpark_df.head()
    


100%|██████████| 6/6 [00:07<00:00,  1.24s/it]


In [16]:
carpark_df.head()

Unnamed: 0,carpark_info,carpark_number,lot_type,lots_available,timestamp (SGT),total_lots,update_datetime
0,"[{'total_lots': '104', 'lot_type': 'C', 'lots_...",HE12,,,2018-01-01 07:05:00,,2018-01-01T07:03:13
1,"[{'total_lots': '583', 'lot_type': 'C', 'lots_...",HLM,,,2018-01-01 07:05:00,,2018-01-01T07:03:02
2,"[{'total_lots': '329', 'lot_type': 'C', 'lots_...",RHM,,,2018-01-01 07:05:00,,2018-01-01T07:03:03
3,"[{'total_lots': '110', 'lot_type': 'C', 'lots_...",BM29,,,2018-01-01 07:05:00,,2017-08-01T12:15:17
4,"[{'total_lots': '97', 'lot_type': 'C', 'lots_a...",Q81,,,2018-01-01 07:05:00,,2018-01-01T07:03:12


In [18]:
import json
carpark_df['lots_available']=''
df3 = pd.DataFrame(carpark_df['carpark_info'].values.tolist(), columns=['carpark_info1','carpark_info2','carpark_info3'])
df3['carpark_info1'] = df3['carpark_info1'].apply(json.dumps)
temp_df1, temp_df2, temp_df3 = df3['carpark_info1'].str.split(',').str
    
temp_df1.apply(extract_number).head()
temp_df2.apply(extract_alpha).head()
temp_df3.apply(extract_number).head()

carpark_df['lot_type'] = temp_df2.apply(extract_alpha)
carpark_df['total_lots'] = temp_df1.apply(extract_number)
carpark_df['lots_available'] = temp_df3.apply(extract_number)

In [None]:
temp_df1.apply(extract_number).head()
temp_df2.apply(extract_alpha).head()
temp_df3.apply(extract_number).head()

carpark_df['lot_type'] = temp_df2.apply(extract_alpha)
carpark_df['total_lots'] = temp_df1.apply(extract_number)
carpark_df['lots_available'] = temp_df3.apply(extract_number)
#carpark_df.head()

outer_df = pd.merge(carpark_df, info_df, how='outer', on=['carpark_number'])
outer_df.head(15)

In [None]:
for date in tqdm(date_list):
    temp_df = rain_data(date)
    rainfallmean_df = rainfallmean_df.append(temp_df, ignore_index=True)

rainfallmean_df

# Union of wind-speed and station data
outer_df = pd.merge(rainfallmean_df, stations_df, how='inner', on=['station_id'])
outer_df.head(15)

filename = 'carparkmean_{}to{}.csv'.format(start_date, end_date)
filename

outer_df.to_csv(filename, index = False)