**Get data from WEB**

In [14]:
import requests
import pandas as pd
from datetime import datetime

#Replace these with the coordinates for Prague and your actual API key
#Prague coordinates
latitude = 50.0755
longitude = 14.4378

#Get the current date and time
current_date_time = datetime.now()

#Extract the current date
current_date = current_date_time.date()



endpoint = 'https://archive-api.open-meteo.com/v1/era5'

#Specify the start and end dates for the historical data
start_date = '2021-01-01'
end_date = current_date

#Specify the variable you want to retrieve (e.g., temperature_2m)
variable = 'temperature_2m'

#Construct the API request URL with parameters
url = f'{endpoint}?latitude={latitude}&longitude={longitude}&start_date={start_date}&end_date={end_date}&hourly={variable}'

#Make the API request
response = requests.get(url)

#Check if the request was successful (status code 200)
if response.status_code == 200:
    data = response.json()
    
    #Access the hourly data from JSON
    hourly_data = data.get('hourly', {})

    #Access the time and temperature from JSON
    time = hourly_data.get('time', [])
    temperature_2m = hourly_data.get(variable, [])

    #Create a DataFrame and save only 2 columns
    df=pd.DataFrame(zip(time, temperature_2m), columns=['date', 'temperature'])
    
else:
    print(f'Error: {response.status_code}, {response.text}')


**Save file in GCP**

In [15]:
from datetime import datetime
import pandas as pd
import os
from google.cloud import storage
from dotenv import load_dotenv

#Load environment variables from .env file
load_dotenv()

#Get environment variables from .env file
keyfile_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
project_id = os.environ.get("GOOGLE_CLOUD_PROJECT")

#Define the GCP bucket
client = storage.Client.from_service_account_json(keyfile_path)
bucket_name = "project-inputs"
bucket = client.get_bucket(bucket_name)

# Get current date and time
current_datetime = datetime.now()
formatted_datetime = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")
file_name = "data_" + formatted_datetime + ".csv"

#Save data into csv
csv_data=df.to_csv(index=False)

#export csv into a bucket
blob = bucket.blob(file_name)
blob.upload_from_string(csv_data)

#get confirmation
print(f'Data saved to GCS bucket: gs://{bucket_name}/{file_name}')

Data saved to GCS bucket: gs://project-inputs/data_2023-12-10_11-53-30.csv


**Clean data**

In [16]:
df.head(5)

Unnamed: 0,date,temperature
0,2021-01-01T00:00,-2.5
1,2021-01-01T01:00,-2.1
2,2021-01-01T02:00,-1.6
3,2021-01-01T03:00,-1.6
4,2021-01-01T04:00,-3.1


In [21]:
#convert the column into a datetime
df['date'] = pd.to_datetime(df['date'])
df.tail(12)

Unnamed: 0,date,temperature
25764,2023-12-10 12:00:00,
25765,2023-12-10 13:00:00,
25766,2023-12-10 14:00:00,
25767,2023-12-10 15:00:00,
25768,2023-12-10 16:00:00,
25769,2023-12-10 17:00:00,
25770,2023-12-10 18:00:00,
25771,2023-12-10 19:00:00,
25772,2023-12-10 20:00:00,
25773,2023-12-10 21:00:00,


In [18]:
df.shape

(25776, 2)

**Save data into Postgres database**

In [24]:
import os
import psycopg2 #PosgreSQL adapter for Python
from dotenv import load_dotenv #loads .env file = connects with the database


#set up connection with the database
url=os.getenv("DATABASE_URL")
connection=psycopg2.connect(url)

#create a table 'rooms' if it doesn't exist with 2 columns 
CREATE_ROOMS_TABLE=(
    """CREATE TABLE IF NOT EXISTS data (
        id SERIAL PRIMARY KEY, 
        name TEXT
    );"""
)


""" INSERT_ROOM_RETURN_ID="INSERT INTO rooms(name) VALUES (%s) RETURNING id;"

INSERT_TEMP="INSERT INTO temperatures(room_id, temperature, date) VALUES (%s, %s, %s);" """




' INSERT_ROOM_RETURN_ID="INSERT INTO rooms(name) VALUES (%s) RETURNING id;"\n\nINSERT_TEMP="INSERT INTO temperatures(room_id, temperature, date) VALUES (%s, %s, %s);" '

**Stage II** 

**Get data from the database and train predictive model on it**