# Обработка фактических данных после парсинга
## Обзор данных

In [1]:
import scripts.pars as pars
from datetime import datetime

In [4]:
data = pars.get_data(pars.url, pars.file_path)
df = pars.create_dataframe_from_json(pars.file_path, data)
df.head(3)

Unnamed: 0,time,temperature_2m,relativehumidity_2m,dewpoint_2m,weathercode,surface_pressure,visibility,windspeed_10m
0,2023-05-29T00:00,8.9,83,6.2,0,1000.6,24140.0,4.3
1,2023-05-29T01:00,8.8,83,6.0,0,1001.2,24140.0,4.3
2,2023-05-29T02:00,9.0,82,6.1,0,1001.5,24140.0,4.0


In [5]:
df = df.rename(columns={'time': 'Date/Time', 'dewpoint_2m': 'Dew Point (C)', 'temperature_2m': 'Temperature (C)',
                            'relativehumidity_2m': 'Humidity', 'windspeed_10m': 'Wind Speed (km/h)',
                            'surface_pressure': 'Pressure (hPa)', 'weathercode': 'Weather'})
print(f'Типы данных столбцов:\n {df.dtypes}\n')
print(f'Количество пропущенных значений:\n {df.isnull().sum()}')

Типы данных столбцов:
 Date/Time             object
Temperature (C)      float64
Humidity               int64
Dew Point (C)        float64
Weather                int64
Pressure (hPa)       float64
visibility           float64
Wind Speed (km/h)    float64
dtype: object

Количество пропущенных значений:
 Date/Time            0
Temperature (C)      0
Humidity             0
Dew Point (C)        0
Weather              0
Pressure (hPa)       0
visibility           0
Wind Speed (km/h)    0
dtype: int64


Переводим Гектопаскали(hPa) в Килопаскали(kPa) и округляем значение до двух знаков после запятой

`1 hPa = 0.1 kPa`

In [6]:
df['Pressure (kPa)'] = df['Pressure (hPa)'] / 10
df = df.drop(columns=['Pressure (hPa)'])
df['Pressure (kPa)'] = df['Pressure (kPa)'].round(2)

Переводим `visibility` из метров в километры

In [7]:
df['Visibility (km)'] = df['visibility'] / 1000
df = df.drop(columns=['visibility'])

In [8]:
df = df[['Date/Time', 'Temperature (C)', 'Dew Point (C)',
             'Humidity', 'Wind Speed (km/h)','Visibility (km)', 'Pressure (kPa)', 'Weather']]

Преобразуем столбец `Date/Time` в формат `datetime`

In [9]:
df["Date/Time"] = df["Date/Time"].apply(lambda x: datetime.strptime(x, "%Y-%m-%dT%H:%M"))

Сохраняем полученную таблицу в формате `.csv`

In [10]:
df.to_csv('data/parsing_data.csv', sep= ';', index=False)
df.head(5)

Unnamed: 0,Date/Time,Temperature (C),Dew Point (C),Humidity,Wind Speed (km/h),Visibility (km),Pressure (kPa),Weather
0,2023-05-29 00:00:00,8.9,6.2,83,4.3,24.14,100.06,0
1,2023-05-29 01:00:00,8.8,6.0,83,4.3,24.14,100.12,0
2,2023-05-29 02:00:00,9.0,6.1,82,4.0,24.14,100.15,0
3,2023-05-29 03:00:00,10.0,5.6,74,4.0,24.14,100.15,0
4,2023-05-29 04:00:00,12.0,5.8,66,5.1,24.14,100.2,0


[Open Meteo API](https://open-meteo.com/)