In [3]:
  
import requests
import json
import pandas as pd
import config
import matplotlib.pyplot as plt
import datetime
from csv import writer


def gen_point_data(name, lat, lon, t_start, t_end):
    '''
    Generate particulate PM2.5 data for a lat/lon point over a set time period and write to csv
    @params: 
        name: name of location
        lat, lon: latitude and longitude in decimal degrees
        t_start, t_end: starting and ending epoch in Unix time
    '''

    # Connect to endpoint and load data
    endpoint = 'http://api.openweathermap.org/data/2.5/air_pollution/history?lat={LAT}&lon={LON}&start={START}&end={END}&appid={KEY}'.format(
        LAT=lat, 
        LON=lon,
        START=t_start,
        END=t_end,
        KEY=config.OPEN_WEATHER_KEY
    )
    page = requests.get(url=endpoint)
    content = json.loads(page.content)
    df = pd.json_normalize(content)

    # List all records
    ls = df['list'][0]
    df_size = len(ls)
    
    # Take daily averages of PM2.5 particulate and add to list
    pm_list = []
    pm_count = 0
    for i in range(df_size):
        pm_count+=ls[i]['components']['pm2_5']
        # Average for each day
        if (i%24 == 0):   
            pm_daily = round(pm_count/24, 5)
            pm_list.append(pm_daily)
            pm_count = 0
    return pm_list

### Retrieve data for list of cities ###
city_df = pd.read_csv('C:\\Users\\Kurly\\Downloads\\Universal-Embeddings-Nick\\Universal-Embeddings-Nick\\data\\city_lat_lon.csv')
city_count = 5 # Actual: len(city_df)

# Start and ending times. Testing for Dec 2020
T_START = 1606853919
T_END = 1609445919

# Derive number of entries from start and end
# Change in epoch to number of hours gets us total entries
num_entries = int((T_END - T_START) / 86400)
time_step = T_START

# Get list of column names based on the number of entries (each hour of data will be one column)
col_names = ['city', 'lat', 'lon']
for i in range(num_entries):
    # Convert daily interval to human-readable
    timedate = datetime.datetime.fromtimestamp(time_step)
    time_string = timedate.strftime('pm25_%Y_%m_%d')
    # Increment time_step
    time_step+=86400
    # Append col to list\
    col_names.append(time_string)

# Write entry to file
with open('C:\\Users\\Kurly\\Downloads\\Universal-Embeddings-Nick\\Universal-Embeddings-Nick\\data\\geocoded-cities-master.csv', 'w', newline='') as f_open:
    writer_obj = writer(f_open)
    # Write header
    writer_obj.writerow(col_names)

    # Loop through all 28,000+ cities and retrieve data
    for i in range(city_count):
        city_name = city_df.iloc[i][0]
        city_lat = city_df.iloc[i][1]
        city_lon = city_df.iloc[i][2]
        city_info=[city_name, city_lat, city_lon]
        # Retrieve particulate list; write row to csv
        entry = gen_point_data(name=city_name, lat=city_lat, lon=city_lon, t_start=T_START, t_end=T_END)
        city_info+=entry
        writer_obj.writerow(city_info)

    f_open.close()

one=pd.read_csv('C:\\Users\\Kurly\\Downloads\\Universal-Embeddings-Nick\\Universal-Embeddings-Nick\\data\\geocoded-cities-master.csv')
one


Unnamed: 0,city,lat,lon,pm25_2020_12_01,pm25_2020_12_02,pm25_2020_12_03,pm25_2020_12_04,pm25_2020_12_05,pm25_2020_12_06,pm25_2020_12_07,...,pm25_2020_12_21,pm25_2020_12_22,pm25_2020_12_23,pm25_2020_12_24,pm25_2020_12_25,pm25_2020_12_26,pm25_2020_12_27,pm25_2020_12_28,pm25_2020_12_29,pm25_2020_12_30
0,NewYork,40.6943,-73.9249,0.06333,1.25,6.60583,10.16167,23.55542,1.67875,6.76292,...,64.78167,31.58792,13.93792,4.60667,5.02625,1.92375,7.7125,13.02917,7.46583,14.29583
1,LosAngeles,34.1139,-118.4068,0.26208,15.33708,5.58375,9.15917,15.93083,29.39667,34.86375,...,10.43583,63.41458,71.14708,4.24125,7.44625,30.32875,32.29708,21.86917,9.13708,6.36917
2,Chicago,41.8373,-87.6862,0.17417,14.31125,13.53667,14.50958,4.03958,7.95333,17.18,...,11.7875,5.2675,4.08042,2.15,2.695,5.51958,21.53875,7.33042,15.575,5.75417
3,Miami,25.7839,-80.2102,0.23833,6.65292,3.52458,3.1275,14.09333,11.9075,4.68583,...,6.15708,8.95667,5.82833,4.47417,4.46958,12.63208,4.15125,4.09792,2.25417,3.5125
4,Dallas,32.7936,-96.7662,0.08917,4.20583,4.13708,5.55083,24.70333,15.11208,6.66375,...,15.595,14.36208,6.41958,1.05417,11.8375,9.21792,4.89542,8.30708,4.91667,3.7225
