In [16]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Load the training data
train_df = pd.read_csv('train.csv')
train_df['Tarih'] = pd.to_datetime(train_df['Tarih'], format='%d.%m.%Y %H:%M')
train_df['month'] = train_df['Tarih'].dt.month
train_df['day'] = train_df['Tarih'].dt.day
train_df['hour'] = train_df['Tarih'].dt.hour
train_df = train_df.drop(['Tarih'], axis=1)

# Load the MED data
med_df = pd.read_csv('med.csv')
med_df['Tarih'] = pd.to_datetime(med_df['Tarih'], format='%Y-%m-%d')

# Set a flag column to indicate MED days
train_df['med_flag'] = np.where(train_df['day'].isin(med_df['Tarih'].dt.day.unique()), 1, 0)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(train_df.drop(['Energy'], axis=1), train_df['Energy'], test_size=0.2, random_state=42)

# Train a random forest regressor on the training data
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Make predictions on the validation data
y_pred = rf.predict(X_val)

# Calculate the mean absolute percentage error (MAPE)
mape = np.mean(np.abs((y_val - y_pred) / y_val)) * 100
print(f"Validation MAPE: {mape:.2f}%")

# Load the sample submission data
sub_df = pd.read_csv('sample_submission.csv')
sub_df['Tarih'] = pd.to_datetime(sub_df['Tarih'], format='%d.%m.%Y %H:%M')
sub_df['month'] = sub_df['Tarih'].dt.month
sub_df['day'] = sub_df['Tarih'].dt.day
sub_df['hour'] = sub_df['Tarih'].dt.hour
sub_df = sub_df.drop(['Energy'], axis=1)

# Add a flag column to indicate MED days
sub_df['med_flag'] = np.where(sub_df['day'].isin(med_df['Tarih'].dt.day.unique()), 1, 0)

# Make predictions on the sample submission data
sub_df['Energy'] = rf.predict(sub_df.drop(['Tarih'], axis=1))

# Save the predictions to a CSV file
sub_df.to_csv('my_submission.csv', index=False)


Validation MAPE: 9.58%
