In [15]:
import io
import requests
import pandas as pd
from utils import Meteomanz



m = Meteomanz()
url = "http://www.meteomanz.com/sy1?ty=hp&l=1&cou=2060&ind=00000&d1=08&m1=02&y1=2006&h1=00Z&d2=08&m2=02&y2=2006&h2=23Z&so=001&np=1"

# Send a GET request to fetch the HTML content
response = requests.get(url=url, headers=m.header(), timeout=20)

# Check if the request was successful
if response.status_code == 200:
    # Wrap the HTML content in io.StringIO
    html_content = response.content.decode('utf-8', errors='ignore')
    html_io = io.StringIO(html_content)

    # Read the HTML content and extract tables into a list of DataFrames
    tables = pd.read_html(html_io)

    # Assuming you want to work with the first table found on the page
    if tables:
        df = tables[0]  # Extract the first table as a DataFrame
        print(df)  # Print the DataFrame
    else:
        print("No tables found on the page.")
else:
    print("Failed to fetch the HTML content. Status code:", response.status_code)

    Station ∆        Date UTC time  Temp. (ºC) Rel. Hum. (%)  \
0      ABADAN  08/02/2006      21Z        20.0           81%   
1      ABADAN  08/02/2006      18Z        20.2           81%   
2      ABADAN  08/02/2006      15Z        21.6           75%   
3      ABADAN  08/02/2006      12Z        24.4           49%   
4      ABADAN  08/02/2006      09Z        21.4           70%   
..        ...         ...      ...         ...           ...   
295   KONARAK  08/02/2006      06Z        23.6           61%   
296     MAKKO  08/02/2006      18Z        -3.4           94%   
297     MAKKO  08/02/2006      15Z        -2.4           84%   
298     MAKKO  08/02/2006      12Z         NaN            0%   
299     MAKKO  08/02/2006      09Z        -0.6           77%   

    Pressure/ Geopot.   Wind dir Wins speed (Km/h) Clouds Low clouds  \
0                   -  330º (NW)              18.0    8/8          -   
1                   -  090º (E )              14.4    6/8          -   
2              

In [7]:
import os
import pandas as pd
import re
import pickle

Temp: ºC, Precip: mm, Presure: Hpa, Wind Speed: Km/h, Sunshine: hr

In [None]:
raw_data_folder = "output/day"
list_csv_files = os.listdir(path=raw_data_folder)

data = pd.DataFrame()

for namefile in list_csv_files:
    f_name = f"{raw_data_folder}/{namefile}"
    df = pd.read_csv(filepath_or_buffer=f_name, na_values=["-", "Ip"])
    data = pd.concat(objs=[data, df])

data = data.rename(
    {
        'Estación ∆' : "Station_Name",
        'Fecha' : "Date",
        'T. med. (ºC)' : "Tmean",
        'T. max (ºC)' : "Tmax",
        'T. min (ºC)' : "Tmin",
        'Prec. (mm)' : "Precip",
        'Presión/ Geopot.' : "Presure",
        'Dir. vi.' : "Wind_Dir",
        'Vel. vi. (Km/h)' : "Wind_Speed",
        'Nub.' : "Cloud_Condition",
        'Prof. nieve (cm)' : "Prof",
        'Insolac. (horas)' : "Sun_Shine",
    },
    axis=1
)

data["Date"] = pd.to_datetime(data["Date"], format="%d/%m/%Y")


data.sort_values(["Station_Name", "Date"], inplace=True)
data.reset_index(drop=True, inplace=True)

def rep(x):
    try:
        x = x.replace(" Hpa", "").strip()
        if "m" in x:
            x = re.sub(r"m(.*)", "", x).strip()
    except:
        pass
    return x


data['Presure'] = data['Presure'].apply(rep)
data['Presure'] = data["Presure"].apply(float)

data['Wind_Dir'] = data["Wind_Dir"].apply(lambda x : re.sub(r"º(.*)", "", x).strip() if pd.notnull(x) else x)
data['Wind_Dir'] = data["Wind_Dir"].apply(lambda x : int(x) if pd.notnull(x) else x)

data.drop(columns=["Cloud_Condition", "Prof"], inplace=True)

data.to_csv(path_or_buf="./output/daily.csv", header=True, index=False)
data

In [None]:
with open("daily.pkl", "wb") as f:
    pickle.dump(data, f)
    

In [28]:
with open("daily.pkl", "rb") as f:
    data = pickle.load(f)

data_mashhad = data[data["Station_Name"] == "MASHHAD"]

data_mashhad.reset_index(drop=True, inplace=True)

In [30]:
data_mashhad = data_mashhad[data_mashhad["Date"] >= "1951-01-01"]
data_mashhad = data_mashhad[data_mashhad["Date"] <= "2023-12-31"]



data_mashhad.to_csv(path_or_buf="./output/mashhad_2000_2023.csv", header=True, index=False)




In [31]:
data_mashhad

Unnamed: 0,Station_Name,Date,Tmean,Tmax,Tmin,Precip,Presure,Wind_Dir,Wind_Speed,Sun_Shine
0,MASHHAD,2000-01-01,,,4.6,,,,,8.7
1,MASHHAD,2000-01-02,14.8,19.2,10.4,0.0,1017.2,59.0,5.0,8.6
2,MASHHAD,2000-01-03,10.8,19.2,2.4,0.0,1013.3,79.0,9.0,7.0
3,MASHHAD,2000-01-04,10.6,18.0,3.2,0.0,1012.2,253.0,7.0,1.6
4,MASHHAD,2000-01-05,10.6,13.0,8.2,0.0,1014.1,250.0,5.0,0.8
...,...,...,...,...,...,...,...,...,...,...
8749,MASHHAD,2023-12-27,9.9,17.2,2.7,0.0,1026.3,117.0,9.0,
8750,MASHHAD,2023-12-28,11.0,20.8,1.2,0.0,1016.8,311.0,8.0,
8751,MASHHAD,2023-12-29,2.6,3.6,1.6,,1030.8,139.0,21.0,
8752,MASHHAD,2023-12-30,6.2,11.9,0.4,0.0,1028.6,153.0,8.0,
