In [1]:
import pandas as pd
from dotenv import load_dotenv
from sqlalchemy import create_engine, types
from sqlalchemy.dialects.postgresql import JSON as postgres_json
from sqlalchemy_utils import database_exists, create_database

In [2]:
import os
import requests
import datetime
import json

In [3]:
from dotenv import dotenv_values


In [4]:
# load your .env file and read all variables you need for the db connection and for weather api

config = dotenv_values("token.env")

username = config['POSTGRES_USER']
password = config['POSTGRES_PW']
host = config['POSTGRES_HOST']
port = config['POSTGRES_PORT']
db_climate = config['DB_CLIMATE']

In [5]:
url = f'postgresql://{username}:{password}@{host}:{port}/{db_climate}'

In [6]:
# create the engine

engine = create_engine(url, echo=True)

In [7]:
engine.url 

postgresql://postgres:***@34.89.241.139:5432/climate

In [8]:
# Add the code you developed in the previous encounter

locations = ['Shanghai','Manila','Perth','Singapore','Accra','Lima','Winnipeg']

weather_api_key = config['weatherapi']

In [15]:
weather_dict = {'extracted_at':[], 'extracted_data':[]}

for city in locations:
     for day in pd.date_range(start='07/20/2023', end='10/22/2023'):
         requested_day = day.date()
         print(city, requested_day)
         api_url = f'http://api.weatherapi.com/v1/history.json?key={weather_api_key}&q={city}&dt={requested_day}'
         response = requests.request("GET", api_url)
         if response.status_code == 200:
            print(f'attempt for {day.date()} in {city} resulted in {response.status_code}', end='\r')
            dt = datetime.datetime.now() 
            dt_str = dt.strftime("%Y-%m-%d %H:%M:%S") 
            weather_dict['extracted_at'].append(dt_str)
            weather_dict['extracted_data'].append(json.loads(response.text))
         else:
            print(f'for date: {day.date()} and city: {city} status code {response.status_code} -> research error')

Shanghai 2023-07-20
Shanghai 2023-07-21-20 in Shanghai resulted in 200
Shanghai 2023-07-22-21 in Shanghai resulted in 200
Shanghai 2023-07-23-22 in Shanghai resulted in 200
Shanghai 2023-07-24-23 in Shanghai resulted in 200
Shanghai 2023-07-25-24 in Shanghai resulted in 200
Shanghai 2023-07-26-25 in Shanghai resulted in 200
Shanghai 2023-07-27-26 in Shanghai resulted in 200
Shanghai 2023-07-28-27 in Shanghai resulted in 200
Shanghai 2023-07-29-28 in Shanghai resulted in 200
Shanghai 2023-07-30-29 in Shanghai resulted in 200
Shanghai 2023-07-31-30 in Shanghai resulted in 200
Shanghai 2023-08-01-31 in Shanghai resulted in 200
Shanghai 2023-08-02-01 in Shanghai resulted in 200
Shanghai 2023-08-03-02 in Shanghai resulted in 200
Shanghai 2023-08-04-03 in Shanghai resulted in 200
Shanghai 2023-08-05-04 in Shanghai resulted in 200
Shanghai 2023-08-06-05 in Shanghai resulted in 200
Shanghai 2023-08-07-06 in Shanghai resulted in 200
Shanghai 2023-08-08-07 in Shanghai resulted in 200
Shanghai 20

In [16]:
# weather_dict is saved as a json file (for backup)

json_data = json.dumps(weather_dict) 

with open('weather_dict.json',mode='w') as f:
    f.write(json.dumps(weather_dict))

In [17]:
# create a dataframe from it

weather_dict_df = pd.DataFrame(weather_dict)

In [18]:
weather_dict_df.head()

Unnamed: 0,extracted_at,extracted_data
0,2024-06-14 14:53:08,"{'location': {'name': 'Shanghai', 'region': 'S..."
1,2024-06-14 14:53:09,"{'location': {'name': 'Shanghai', 'region': 'S..."
2,2024-06-14 14:53:11,"{'location': {'name': 'Shanghai', 'region': 'S..."
3,2024-06-14 14:53:12,"{'location': {'name': 'Shanghai', 'region': 'S..."
4,2024-06-14 14:53:14,"{'location': {'name': 'Shanghai', 'region': 'S..."


In [19]:
# 5. Define data types for the table in DB (this is why we imported types and postgres_json)

dtype_dict = {'extracted_at':types.DateTime, 'extracted_data':postgres_json}

In [20]:
# 6. Using pandas method .to_sql send the content of the dataframe to the table weather_raw in your climate database. 

weather_dict_df.to_sql('weather_raw', engine, if_exists='replace', dtype=dtype_dict)

2024-06-14 14:56:37,857 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-06-14 14:56:37,861 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s, %(param_2)s, %(param_3)s, %(param_4)s, %(param_5)s]) AND pg_catalog.pg_table_is_visible(pg_catalog.pg_class.oid) AND pg_catalog.pg_namespace.nspname != %(nspname_1)s
2024-06-14 14:56:37,862 INFO sqlalchemy.engine.Engine [cached since 937.6s ago] {'table_name': 'weather_raw', 'param_1': 'r', 'param_2': 'p', 'param_3': 'f', 'param_4': 'v', 'param_5': 'm', 'nspname_1': 'pg_catalog'}
2024-06-14 14:56:37,895 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalo

665