# Addressing Data Gaps with Machine Learning
### Temperature Blanket Project

In [1]:
# Load Dependencies
import pandas as pd
import os
from pathlib import Path

## Load and merge local temperature datasets

In [14]:
# Define the data dictionary
raw_data_directory = '../Resources/Raw/'

# Create a list of all csv files in data directory
raw_csv_files = [file for file in os.listdir(raw_data_directory) if file.endswith('.csv')]

# Load all csv files into a dataframe, then append them to a list
dataframes = []

for file in raw_csv_files:
    file_path = os.path.join(raw_data_directory, file)
    df = pd.read_csv(file_path, names=['date', 'temp_c', 'humidity'], header=0)
    dataframes.append(df)

# Merge all dataframes into one dataframe
merged_df = pd.concat(dataframes, ignore_index=True)

# Write the merged dataframe to a csv file
output_file = '../Resources/Processed/merged_data.csv'
merged_df.to_csv(output_file, index=False)

print("Merged dataset saved to:", output_file)


Merged dataset saved to: ../Resources/Processed/merged_data.csv


In [23]:
# Display dataframe preview
merged_df.head()

Unnamed: 0,date,temp_c,humidity
0,2023-02-28 13:23:00,11.4,57.9
1,2023-02-28 13:24:00,11.4,51.0
2,2023-02-28 13:25:00,10.5,46.3
3,2023-02-28 13:26:00,9.6,48.1
4,2023-02-28 13:27:00,9.0,50.3


In [16]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 241532 entries, 0 to 241531
Data columns (total 3 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   date      241532 non-null  object 
 1   temp_c    241532 non-null  float64
 2   humidity  241532 non-null  float64
dtypes: float64(2), object(1)
memory usage: 5.5+ MB


In [22]:
# Convert the 'date' column to datetime
merged_df['date'] = pd.to_datetime(merged_df['date'])

print(f'Date column type: {merged_df.date.dtypes}')

Date column type: datetime64[ns]
