In [1]:
import pandas as pd
from dotenv import load_dotenv
from sqlalchemy import create_engine, types
from sqlalchemy.dialects.postgresql import JSON as postgres_json
from sqlalchemy_utils import database_exists, create_database

In [17]:
import os
import requests
import datetime
import json

In [3]:
from dotenv import dotenv_values


In [6]:
# load your .env file and read all variables you need for the db connection and for weather api

config = dotenv_values("token.env")

username = config['POSTGRES_USER']
password = config['POSTGRES_PW']
host = config['POSTGRES_HOST']
port = config['POSTGRES_PORT']
db_climate = config['DB_CLIMATE']

In [7]:
url = f'postgresql://{username}:{password}@{host}:{port}/{db_climate}'

In [9]:
# create the engine

engine = create_engine(url, echo=True)

In [10]:
engine.url 

postgresql://postgres:***@34.89.241.139:5432/climate

In [15]:
# Add the code you developed in the previous encounter

locations = ['Shanghai','Manila','Perth','Singapore']

weather_api_key = config['weatherapi']

In [18]:
weather_dict = {'extracted_at':[], 'extracted_data':[]}

for city in locations:
     for day in pd.date_range(start='06/20/2023', end='06/22/2023'):
         requested_day = day.date()
         print(city, requested_day)
         api_url = f'http://api.weatherapi.com/v1/history.json?key={weather_api_key}&q={city}&dt={requested_day}'
         response = requests.request("GET", api_url)
         if response.status_code == 200:
            print(f'attempt for {day.date()} in {city} resulted in {response.status_code}', end='\r')
            dt = datetime.datetime.now() 
            dt_str = dt.strftime("%Y-%m-%d %H:%M:%S") 
            weather_dict['extracted_at'].append(dt_str)
            weather_dict['extracted_data'].append(json.loads(response.text))
         else:
            print(f'for date: {day.date()} and city: {city} status code {response.status_code} -> research error')

Shanghai 2023-06-20
Shanghai 2023-06-21-20 in Shanghai resulted in 200
Shanghai 2023-06-22-21 in Shanghai resulted in 200
Manila 2023-06-2006-22 in Shanghai resulted in 200
Manila 2023-06-2106-20 in Manila resulted in 200
Manila 2023-06-2206-21 in Manila resulted in 200
Perth 2023-06-20-06-22 in Manila resulted in 200
Perth 2023-06-21-06-20 in Perth resulted in 200
Perth 2023-06-22-06-21 in Perth resulted in 200
Singapore 2023-06-2022 in Perth resulted in 200
Singapore 2023-06-2120 in Singapore resulted in 200
Singapore 2023-06-2221 in Singapore resulted in 200
attempt for 2023-06-22 in Singapore resulted in 200

In [19]:
# weather_dict is saved as a json file (for backup)

json_data = json.dumps(weather_dict) 

with open('weather_dict.json',mode='w') as f:
    f.write(json.dumps(weather_dict))

In [20]:
# create a dataframe from it

weather_dict_df = pd.DataFrame(weather_dict)

In [26]:
weather_dict_df.head()

Unnamed: 0,extracted_at,extracted_data
0,2024-06-11 15:08:35,"{'location': {'name': 'Shanghai', 'region': 'S..."
1,2024-06-11 15:08:35,"{'location': {'name': 'Shanghai', 'region': 'S..."
2,2024-06-11 15:08:35,"{'location': {'name': 'Shanghai', 'region': 'S..."
3,2024-06-11 15:08:35,"{'location': {'name': 'Manila', 'region': 'Man..."
4,2024-06-11 15:08:35,"{'location': {'name': 'Manila', 'region': 'Man..."


In [23]:
# 5. Define data types for the table in DB (this is why we imported types and postgres_json)

dtype_dict = {'extracted_at':types.DateTime, 'extracted_data':postgres_json}

In [24]:
# 6. Using pandas method .to_sql send the content of the dataframe to the table weather_raw in your climate database. 

weather_dict_df.to_sql('weather_raw', engine, if_exists='replace', dtype=dtype_dict)

2024-06-11 15:15:15,798 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2024-06-11 15:15:15,800 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-06-11 15:15:15,851 INFO sqlalchemy.engine.Engine select current_schema()
2024-06-11 15:15:15,852 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-06-11 15:15:15,891 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2024-06-11 15:15:15,893 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-06-11 15:15:15,933 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-06-11 15:15:15,940 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s, %(param_2)s, %(param_3)s, %(param_4)s, %(param_5)s]) AND pg_catalog.pg_table_is_visible(pg_catalog.pg_class.oid) AND pg_catalog.pg_namespace.nspname != %(nspname

12