## Load Libraries

In [1]:
import pandas as pd
import numpy as np
from pandas import Timestamp
import os
from datetime import datetime, timedelta

## Read the JSON files from directory

In [2]:
# Read the directories with the data and save file_names in two list
path_to_weather = 'python_data/weather_data/'

json_files_weather = [single_json for single_json in os.listdir(path_to_weather) if single_json.endswith('.json')]

In [3]:
json_files_weather[:5]

['Weather_Data_2017_07_05_08_45_59.json',
 'Weather_Data_2017_07_05_11_18_10.json',
 'Weather_Data_2017_07_05_13_27_30.json',
 'Weather_Data_2017_07_05_17_11_14.json',
 'Weather_Data_2017_07_05_19_43_32.json']

In [16]:
#Check a random json to see how it looks like
with open(path_to_weather+json_files_weather[18]) as json_data:
    d = json.load(json_data)
d

{u'base': u'stations',
 u'clouds': {u'all': 76},
 u'cod': 200,
 u'coord': {u'lat': 56.38, u'lon': 24.64},
 u'dt': 1499613613,
 u'id': 600438,
 u'main': {u'grnd_level': 1018.46,
  u'humidity': 87,
  u'pressure': 1018.46,
  u'sea_level': 1024.73,
  u'temp': 292.488,
  u'temp_max': 292.488,
  u'temp_min': 292.488},
 u'name': u'Birzai',
 u'sys': {u'country': u'LT',
  u'message': 0.0028,
  u'sunrise': 1499564779,
  u'sunset': 1499627185},
 u'weather': [{u'description': u'broken clouds',
   u'icon': u'04d',
   u'id': 803,
   u'main': u'Clouds'}],
 u'wind': {u'deg': 255.51, u'speed': 2.12}}

In [5]:
d['weather'][0]['icon']

u'04d'

## Useful Functions

In [6]:
# Create a function that gets the date from filename title
def getDate(s):
    first_split = s.split('_')[2]
    second_split = s.split('_')[3]
    third_split = s.split('_')[4]
    return first_split + '-' + second_split + '-' + third_split

In [7]:
# Create a function that gets the time from filename title
def getTime(s):
    first_split = s.split('_')[5]
    second_split = s.split('_')[6]
    third_split = s.split('_')[7][:-5]
    return first_split + ':' + second_split + ':' + third_split

In [8]:
# Create a good format for the Date column
def dateConverter(s):
    # Set date formats
    time_format = "%Y-%m-%d"

    # Convert from str to datetime
    converted = datetime.strptime(s,time_format)
    
    return converted

In [9]:
testDate = getDate(json_files_weather[0])

In [10]:
testTime = getTime(json_files_weather[0])

In [11]:
testDate

'2017-07-05'

In [23]:
# Initiate a new dataframe
df = pd.DataFrame()

# Parse json weather files data
for i,filename in enumerate(json_files_weather):
    with open(path_to_weather+filename) as json_data:
        d = json.load(json_data)
        df.set_value(i,'date',dateConverter(getDate(filename)))
        df.set_value(i,'time',getTime(filename))
        df.set_value(i,'windSpeed',d['wind']['speed'] * 3.6)
        # Some of the stations didn't give degree after calling the API
        if len(d['wind'])>1:
            df.set_value(i,'windDeg',d['wind']['deg'])
        else:
            df.set_value(i,'windDeg',0)
            
        df.set_value(i,'humidity%',d['main']['humidity'])
        df.set_value(i,'tempC',d['main']['temp'] - 272.15)
        df.set_value(i,'icon',d['weather'][0]['icon'])
        df.set_value(i,'desc',d['weather'][0]['description'])
        df.set_value(i,'lon', d['coord']['lon'])
        df.set_value(i,'lat', d['coord']['lat'])

In [27]:
df[:5]

Unnamed: 0,date,time,windSpeed,windDeg,humidity%,tempC,icon,desc,lon,lat
0,2017-07-05,08:45:59,27.72,290.0,82.0,14.0,04d,overcast clouds,24.8,59.42
1,2017-07-05,11:18:10,25.92,300.0,62.0,16.0,02d,few clouds,24.7,59.44
2,2017-07-05,13:27:30,31.32,280.0,62.0,16.0,01d,clear sky,24.62,59.44
3,2017-07-05,17:11:14,33.48,280.0,62.0,16.0,01d,clear sky,24.38,59.45
4,2017-07-05,19:43:32,31.32,280.0,58.0,15.0,01d,clear sky,24.12,59.34


## Add day# and iteration number on the table above.

In [26]:
# Create helper dataframe with unique days
df_helper = df.groupby(by=['date','time']).count().iloc[:,0].reset_index().filter(items=['date','time'])

# Create columns for iter_no(per activity) and day_no(per day)
days = list(set(df_helper['date']))
days.sort()
day_no = list()
iter_no = list()
for index,day in enumerate(days):
    counter=1
    for dfday in df_helper['date']:
        if dfday == day:
            iter_no.append(counter)
            day_no.append(index+1)
            counter+=1

df_helper['day_no'] = pd.Series(day_no).values
df_helper['iter_no'] = pd.Series(iter_no).values      

df_helper[:5]

Unnamed: 0,date,time,day_no,iter_no
0,2017-07-05,08:45:59,1,1
1,2017-07-05,11:18:10,1,2
2,2017-07-05,13:27:30,1,3
3,2017-07-05,17:11:14,1,4
4,2017-07-05,19:43:32,1,5


In [28]:
# Merge the above tables
df = pd.merge(df,df_helper,on=['date','time'])

# Check main df
df.head()

Unnamed: 0,date,time,windSpeed,windDeg,humidity%,tempC,icon,desc,lon,lat,day_no,iter_no
0,2017-07-05,08:45:59,27.72,290.0,82.0,14.0,04d,overcast clouds,24.8,59.42,1,1
1,2017-07-05,11:18:10,25.92,300.0,62.0,16.0,02d,few clouds,24.7,59.44,1,2
2,2017-07-05,13:27:30,31.32,280.0,62.0,16.0,01d,clear sky,24.62,59.44,1,3
3,2017-07-05,17:11:14,33.48,280.0,62.0,16.0,01d,clear sky,24.38,59.45,1,4
4,2017-07-05,19:43:32,31.32,280.0,58.0,15.0,01d,clear sky,24.12,59.34,1,5


# Save it to .csv

In [30]:
# Save it to a csv for D3
df.to_csv('../app/interactive_map/weather.csv', index=False)