WEB SCRAPING CODE

In [3]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

def get_date(time):
    url = "https://ihtiman-meteo.com/wxhistory.php?date=" + str(time)
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    dates = []
    for d in soup.find_all('table', attrs={'width': '600'}):
        try:
            date = d.find('td', attrs={'colspan': '2'}).text
            date_final = ' '.join(date.split(' ')[0:2])
            dates.append(date_final)
        except:
            break
    suffix = ' ' + str(time)[0:4]
    dates = dates[:-1]
    modified_date = [item + suffix for item in dates]
    return modified_date

def get_light(table):
    row = []
    for i in table.find_all('tr'):
        row.append(i.text)
    row = row[1:]

    # Initializing an empty dictionary to store the results
    result_dict = {}

    # Iterating over the list
    for item in row:
        # Finding the index of the first numeric character
        index = next((i for i, c in enumerate(item) if c.isdigit() or c == '-'), len(item))

        # Splitting the element at the found index
        key = item[:index].strip()
        value = item[index:].strip()

        # Adding key-value pair to the dictionary
        result_dict[key] = value

    # Returning the resulting dictionary
    return result_dict

def get_daily_data(time):
    url = "https://ihtiman-meteo.com/wxhistory.php?date=" + str(time)
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    all_tables = soup.find_all('table', attrs={'width': '600', 'cellpadding': '3'})[:-3]
    list_of_data = []
    for d in all_tables:
        daily_data = get_light(d)
        list_of_data.append(daily_data)
    return list_of_data

def joining_dates_with_data(dates, data):
    final_dict = dict.fromkeys(dates, None)
    i = 0
    for key in final_dict:
        final_dict[key] = data[i]
        i += 1
    return final_dict

def get_data(time):
    dates = get_date(time)
    data = get_daily_data(time)
    ff1 = joining_dates_with_data(dates, data)
    df = pd.DataFrame(ff1).T
    return df

df_202010 = get_data(202010)
df_202011 = get_data(202011)
result = pd.concat([df_202010, df_202011])

result = get_data(201812)
result

years = ['2019', '2020', '2021', '2022', '2023']
months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']
for i in years:
    for j in months:
        df = get_data(i + j)
        result = pd.concat([result, df])

d = get_date(201810)
print(d)

result_reset = result.reset_index()
result_reset.columns = [
    'Date', 'Average temperature', 'Average humidity', 'Average dewpoint',
    'Average barometer', 'Average windspeed', 'Average gustspeed', 'Average direction',
    'Rainfall for month', 'Rainfall for year', 'Maximum rain per minute', 'Maximum temperature',
    'Minimum temperature', 'Maximum humidity', 'Minimum humidity', 'Maximum pressure',
    'Minimum pressure', 'Maximum windspeed', 'Maximum gust speed', 'Maximum heat index'
]
result_reset.to_csv('raw_weather_data.csv', index=False)
df = pd.read_csv('raw_weather_data.csv')
df



['1 October 2018', '2 October 2018', '3 October 2018', '4 October 2018', '5 October 2018', '6 October 2018', '7 October 2018', '8 October 2018', '9 October 2018', '10 October 2018', '11 October 2018', '12 October 2018', '13 October 2018', '14 October 2018', '15 October 2018', '16 October 2018', '17 October 2018', '18 October 2018', '19 October 2018', '20 October 2018', '21 October 2018', '22 October 2018', '23 October 2018', '24 October 2018', '25 October 2018', '26 October 2018', '27 October 2018', '28 October 2018', '29 October 2018', '30 October 2018', '31 October 2018']


Unnamed: 0,Date,Average temperature,Average humidity,Average dewpoint,Average barometer,Average windspeed,Average gustspeed,Average direction,Rainfall for month,Rainfall for year,Maximum rain per minute,Maximum temperature,Minimum temperature,Maximum humidity,Minimum humidity,Maximum pressure,Minimum pressure,Maximum windspeed,Maximum gust speed,Maximum heat index
0,1 December 2018,-3.1°C,72%,-7.4°C,1019.1 hPa,0.1 m/s,0.2 m/s,204° (SSW),1.2 mm,560.4 mm,1.2 mm on 01 at 14:31,1.3°C on 01 at 15:40,-7.0°C on 01 at 04:24,77% on 01 at 00:00,67% on 01 at 13:02,1021.0 hPa on 01 at 23:50,1017.0 hPa on 01 at 04:49,1.5 m/s on 01 at 15:12,2.1 m/s from 268°(W) on 01 at 15:05,1.3°C on 01 at 15:40
1,2 December 2018,-0.1°C,79%,-3.3°C,1019.4 hPa,0.2 m/s,0.4 m/s,223° (SW),2.4 mm,561.6 mm,1.2 mm on 02 at 11:03,4.7°C on 02 at 15:19,-4.9°C on 02 at 08:05,83% on 02 at 10:16,75% on 02 at 13:05,1021.0 hPa on 02 at 11:36,1018.0 hPa on 02 at 17:21,2.1 m/s on 02 at 18:04,2.6 m/s from 132°(SE) on 02 at 19:06,4.7°C on 02 at 15:19
2,3 December 2018,-1.4°C,86%,-3.4°C,1016.2 hPa,0.0 m/s,0.1 m/s,272° (W),2.4 mm,561.6 mm,0.0 mm on 03 at 23:59,4.7°C on 03 at 16:03,-4.5°C on 03 at 02:52,90% on 03 at 15:39,82% on 03 at 01:33,1019.0 hPa on 03 at 00:50,1014.0 hPa on 03 at 17:51,1.5 m/s on 03 at 13:45,2.1 m/s from 291°(WNW) on 03 at 13:42,4.7°C on 03 at 16:03
3,4 December 2018,0.9°C,88%,-1.0°C,1012.5 hPa,0.0 m/s,0.1 m/s,297° (WNW),2.4 mm,561.6 mm,0.0 mm on 04 at 23:59,5.5°C on 04 at 15:49,-3.7°C on 04 at 04:27,93% on 04 at 14:21,81% on 04 at 23:59,1015.0 hPa on 04 at 00:06,1010.0 hPa on 04 at 14:36,1.5 m/s on 04 at 16:13,2.1 m/s from 329°(NNW) on 04 at 17:41,5.5°C on 04 at 15:49
4,5 December 2018,3.4°C,69%,-2.2°C,1017.5 hPa,2.7 m/s,4.1 m/s,334° (NNW),2.4 mm,561.6 mm,0.0 mm on 05 at 23:59,5.0°C on 05 at 09:22,1.3°C on 05 at 23:59,85% on 05 at 12:13,49% on 05 at 23:45,1022.0 hPa on 05 at 23:59,1013.0 hPa on 05 at 00:20,6.7 m/s on 05 at 16:57,10.8 m/s from 336°(NNW) on 05 at 16:53,5.0°C on 05 at 09:22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1852,27 December 2023,5.2°C,71%,0.2°C,1022.2 hPa,1.0 m/s,1.9 m/s,325° (NW),5.0 mm,402.3 mm,0.0 mm on 27 at 23:59,13.7°C on 27 at 14:45,-0.6°C on 27 at 05:09,83% on 27 at 08:58,54% on 27 at 15:00,1026.2 hPa on 27 at 23:26,1018.2 hPa on 27 at 00:05,5.1 m/s on 27 at 12:33,8.2 m/s from 320°(NW) on 27 at 12:32,13.7°C on 27 at 14:45
1853,28 December 2023,2.7°C,73%,-1.7°C,1025.2 hPa,0.2 m/s,0.5 m/s,1° (N),5.0 mm,402.3 mm,0.0 mm on 28 at 23:59,11.8°C on 28 at 14:59,-2.8°C on 28 at 08:37,83% on 28 at 09:38,60% on 28 at 15:38,1027.4 hPa on 28 at 10:28,1023.0 hPa on 28 at 23:29,1.5 m/s on 28 at 17:40,2.6 m/s from 100°(E) on 28 at 17:41,11.8°C on 28 at 14:59
1854,29 December 2023,2.7°C,71%,-2.3°C,1020.5 hPa,0.6 m/s,1.2 m/s,321° (NW),5.0 mm,402.3 mm,0.0 mm on 29 at 23:59,13.3°C on 29 at 14:47,-3.4°C on 29 at 08:39,84% on 29 at 09:45,48% on 29 at 14:59,1023.2 hPa on 29 at 00:04,1018.4 hPa on 29 at 15:13,4.1 m/s on 29 at 15:40,7.2 m/s from 326°(NW) on 29 at 15:24,13.3°C on 29 at 14:47
1855,30 December 2023,2.0°C,74%,-2.2°C,1019.0 hPa,0.6 m/s,1.1 m/s,313° (NW),5.0 mm,402.3 mm,0.0 mm on 30 at 23:59,9.8°C on 30 at 14:57,-3.2°C on 30 at 08:36,81% on 30 at 11:24,62% on 30 at 15:03,1020.4 hPa on 30 at 10:36,1017.8 hPa on 30 at 15:26,5.7 m/s on 30 at 14:09,9.3 m/s from 319°(NW) on 30 at 13:48,9.8°C on 30 at 14:57
