In [1]:
from config import API_Key
import pandas as pd
import requests
import json
from citipy import citipy
import numpy as np

In [2]:
lat=35.68
lon=139.77
url=f"http://api.airvisual.com/v2/nearest_station?lat={lat}&lon={lon}&key={API_Key}"
data=requests.get(url).json()
print(json.dumps(data,indent=4,sort_keys=True))

{
    "data": {
        "message": "permission_denied (you don't have access to this endpoint"
    },
    "status": "fail"
}


In [3]:
url = f"http://api.airvisual.com/v2/city?city=Los Angeles&state=California&country=USA&key={API_Key}"
response=requests.get(url)
data=response.json()
print(json.dumps(data,indent=4,sort_keys=4))

{
    "data": {
        "city": "Los Angeles",
        "country": "USA",
        "current": {
            "pollution": {
                "aqicn": 30,
                "aqius": 53,
                "maincn": "p1",
                "mainus": "p2",
                "ts": "2022-06-23T11:00:00.000Z"
            },
            "weather": {
                "hu": 74,
                "ic": "03n",
                "pr": 1012,
                "tp": 19,
                "ts": "2022-06-23T11:00:00.000Z",
                "wd": 220,
                "ws": 2.06
            }
        },
        "location": {
            "coordinates": [
                -118.2417,
                34.0669
            ],
            "type": "Point"
        },
        "state": "California"
    },
    "status": "success"
}


In [4]:
#call climate classifications from csv by lat/lon
filepath="Climate_Data/ClimateClassifications.csv"
climate_df=pd.read_csv(filepath)
climate_df

Unnamed: 0,Lat,Lon,Cls
0,-89.75,-179.75,EF
1,-89.75,-179.25,EF
2,-89.75,-178.75,EF
3,-89.75,-178.25,EF
4,-89.75,-177.75,EF
...,...,...,...
92411,83.75,-30.75,ET
92412,83.75,-30.25,ET
92413,83.75,-29.75,ET
92414,83.75,-29.25,ET


In [5]:
#create list of nearest city to each row of climate datadrame
nearest_city=[]
nearest_city_df=climate_df
for i in range(0,len(nearest_city_df)):
    lat=nearest_city_df.loc[i,"Lat"]
    lon=nearest_city_df.loc[i,"Lon"]
    city=citipy.nearest_city(lat,lon)
    nearest_city.append(city.city_name)
    
    


In [7]:
#create new dataframe with column showing nearest city to each lat/lon
climate_cities=nearest_city_df
climate_cities["Nearest City"]=nearest_city
climate_cities
climate_cities.to_csv("Climate_Data/Nearest_Cities.csv",index=False)
climate_cities

Unnamed: 0,Lat,Lon,Cls,Nearest City
0,-89.75,-179.75,EF,vaini
1,-89.75,-179.25,EF,vaini
2,-89.75,-178.75,EF,vaini
3,-89.75,-178.25,EF,vaini
4,-89.75,-177.75,EF,vaini
...,...,...,...,...
92411,83.75,-30.75,ET,illoqqortoormiut
92412,83.75,-30.25,ET,illoqqortoormiut
92413,83.75,-29.75,ET,illoqqortoormiut
92414,83.75,-29.25,ET,illoqqortoormiut


In [8]:
#Remove all duplicate city names to be left with a list of cities with their climate classifcation
cities_shortlist_df=climate_cities.drop_duplicates(subset=["Nearest City"],keep='first')
cities_shortlist_df=cities_shortlist_df.drop(columns=["Lat","Lon"])
cities_shortlist_df=cities_shortlist_df.reset_index(drop=True)
cities_shortlist_df=cities_shortlist_df.rename(columns={"Nearest City":"City","Cls":"Climate"})
cities_shortlist_df=cities_shortlist_df[["City","Climate"]]
cities_shortlist_df.to_csv("Climate_Data/Nearest_Cities_Shortlist.csv",index=False)
cities_shortlist_df

Unnamed: 0,City,Climate
0,vaini,EF
1,mataura,EF
2,rikitea,EF
3,punta arenas,EF
4,ushuaia,EF
...,...,...
14939,dikson,ET
14940,qaanaaq,ET
14941,longyearbyen,ET
14942,narsaq,ET


In [9]:
world_cities_df=pd.read_csv("Climate_Data/world_cities.csv")
cols = world_cities_df.select_dtypes(include=[object]).columns
world_cities_df[cols] = world_cities_df[cols].apply(lambda x: x.str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8'))
world_cities_df['name']=world_cities_df['name'].str.lower()
world_cities_df

Unnamed: 0,name,country,subcountry,geonameid
0,les escaldes,Andorra,Escaldes-Engordany,3040051
1,andorra la vella,Andorra,Andorra la Vella,3041563
2,umm al qaywayn,United Arab Emirates,Umm al Qaywayn,290594
3,ras al-khaimah,United Arab Emirates,Ras al Khaymah,291074
4,khawr fakkan,United Arab Emirates,Ash Shariqah,291696
...,...,...,...,...
23013,bulawayo,Zimbabwe,Bulawayo,894701
23014,bindura,Zimbabwe,Mashonaland Central,895061
23015,beitbridge,Zimbabwe,Matabeleland South,895269
23016,epworth,Zimbabwe,Harare,1085510


In [23]:
cities_shortlist_df=pd.read_csv("Climate_Data/Nearest_Cities_Shortlist.csv")
complete_df=pd.merge(cities_shortlist_df,world_cities_df,left_on='City',right_on='name')
complete_df=complete_df.drop(columns=["name"])
complete_df=complete_df.rename(columns={"City":"city","Climate":"climate"})
complete_df.to_csv("Clean_Data/CityCountryClimate.csv")
complete_df

Unnamed: 0,city,climate,country,subcountry,geonameid
0,punta arenas,EF,Chile,Magallanes,3874787
1,ushuaia,EF,Argentina,Tierra del Fuego,3833367
2,hermanus,EF,South Africa,Western Cape,3366880
3,kruisfontein,EF,South Africa,Eastern Cape,986717
4,port elizabeth,EF,South Africa,Eastern Cape,964420
...,...,...,...,...,...
7686,severomorsk,Dfc,Russia,Murmansk,496278
7687,norilsk,Dfc,Russia,Krasnoyarskiy,1497337
7688,murmansk,Dfc,Russia,Murmansk,524305
7689,polyarnyy,Dfc,Russia,Murmansk,506763


In [24]:
cities_water_air_df=pd.read_csv("Water_Air_Data\cities_air_quality_water_pollution.18-10-2021.csv")
cities_water_air_df


Unnamed: 0,City,"""Region""","""Country""","""AirQuality""","""WaterPollution"""
0,New York City,"""New York""","""United States of America""",46.816038,49.504950
1,"Washington, D.C.","""District of Columbia""","""United States of America""",66.129032,49.107143
2,San Francisco,"""California""","""United States of America""",60.514019,43.000000
3,Berlin,"""""","""Germany""",62.364130,28.612717
4,Los Angeles,"""California""","""United States of America""",36.621622,61.299435
...,...,...,...,...,...
3958,Yanbu,"""Medina Province""","""Saudi Arabia""",0.000000,50.000000
3959,Cordoba,"""Andalusia""","""Spain""",85.714286,8.333333
3960,Vic,"""Catalonia""","""Spain""",100.000000,0.000000
3961,Segovia,"""Castile and Leon""","""Spain""",100.000000,0.000000


In [28]:
cities_water_air_df.rename(columns = {'City':'city'}, inplace = True)
cities_water_air_df['city'] = cities_water_air_df['city'].str.lower()
final_df=pd.merge(complete_df,cities_water_air_df,on='city')

#complete_df=complete_df.drop(columns=["name"])
#complete_df=complete_df.rename(columns={"City":"city","Climate":"climate"})
#complete_df.to_csv("Clean_Data/CityCountryClimate.csv")
final_df

Unnamed: 0,city,climate,country,subcountry,geonameid,"""Region""","""Country""","""AirQuality""","""WaterPollution"""
0,punta arenas,EF,Chile,Magallanes,3874787.0,,,,
1,ushuaia,EF,Argentina,Tierra del Fuego,3833367.0,,,,
2,hermanus,EF,South Africa,Western Cape,3366880.0,,,,
3,kruisfontein,EF,South Africa,Eastern Cape,986717.0,,,,
4,port elizabeth,EF,South Africa,Eastern Cape,964420.0,,,,
...,...,...,...,...,...,...,...,...,...
11649,Jubail,,,,,"""Eastern Province""","""Saudi Arabia""",30.46875,38.793103
11650,Yanbu,,,,,"""Medina Province""","""Saudi Arabia""",0.00000,50.000000
11651,Vic,,,,,"""Catalonia""","""Spain""",100.00000,0.000000
11652,Segovia,,,,,"""Castile and Leon""","""Spain""",100.00000,0.000000
