In [None]:
!pip install autogluon

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
# from autogluon.tabular import TabularPredictor

In [None]:
import plotly.graph_objs as go

def showPredictionsOnGraph(train,sub_df,allDates,y_pred):
  train_dates = allDates[: (sub_df.shape[0]*-1)]
  train_values = train["d_enerji"]

  pred_dates = allDates[(sub_df.shape[0]*-1):]
  pred_values = y_pred

  trace_train = go.Scatter(
      x=train_dates,
      y=train_values,
      mode='markers',
      name='Train',
      marker={
          "size" : 5,
          "color":'blue' 
      }
  )
  trace_pred = go.Scatter(
      x=pred_dates,
      y=pred_values,
      mode='markers',
      name='Prediction',
      marker={
          "size" : 5,
          "color" : 'red'  
      }
      )
  layout = go.Layout(
      title='Train ve Tahmin Verileri',
      xaxis={"title" : 'Tarih'},
      yaxis={"title" :'Değer'},
      hovermode='closest'
  )

  fig = go.Figure(data=[trace_train, trace_pred], layout=layout)
  fig.show()

In [None]:
def train_test_split(train,percent = 0.8):
  n_rows = train.shape[0]
  df_train = train.iloc[:int(percent*n_rows), :]
  df_test = train.iloc[int(percent*n_rows):, :]
  X_train = df_train.drop("d_enerji",axis = 1)
  y_train = df_train["d_enerji"]
  X_test = df_test.drop("d_enerji",axis = 1)
  y_test = df_test["d_enerji"] #y_true
  return X_train, y_train, X_test, y_test

GradientBoostingRegressor (6.24233)
RandomForestRegressor (9.37007)
XGBoost (5.74 civarı)


In [None]:
def giveGeneralInfo(df):
  print("Columns: ", df.columns.tolist())
  print("-------------------")
  print(df.info())
  print("-----------------")
  print(df.isna().sum())

def calculateTimeMean(df):
  times = ["00:00:00","01:00:00","02:00:00","03:00:00","04:00:00","05:00:00","06:00:00","07:00:00","08:00:00","09:00:00","10:00:00","11:00:00","12:00:00","13:00:00","14:00:00",
          "15:00:00","16:00:00","17:00:00","18:00:00","19:00:00","20:00:00","21:00:00","22:00:00","23:00:00"]
  mean_list = []
  for time in times:
    temp_df = df.loc[train["saat"] == time]
    mean = temp_df["d_enerji"].mean(axis = 0)
    mean_list.append(mean)
  return mean_list

def splitDateColumn(df):
  df["tarihwithhours"] = pd.to_datetime(df["tarih"])
  df['year'] = df['tarihwithhours'].dt.year
  df['month'] = df['tarihwithhours'].dt.month
  df['day'] = df['tarihwithhours'].dt.day
  df['dayofweek'] = df['tarihwithhours'].dt.dayofweek
  df['hour'] = df['tarihwithhours'].dt.time
  df["tarih"] = df['tarihwithhours'].dt.date
  # df = df.drop(["tarih"],axis = 1)
  df["tarih"] = df["tarih"].astype(str)
  return df

def addWeekendInfo(df):
  df["is_weekend"] = 0
  df.loc[df["dayofweek"] >= 5, "is_weekend"] = 1
  return df


def addInterruptionInfo(df): # elektrik kesintisi varsa 1, yoksa 0
  df["interruption"] = 0
  for date in dates:
    if date in df["tarih"].tolist():
      df.loc[df["tarih"] == date,"interruption"] = 1
  return df

def dropInterruptions(df):  # kesintinin olduğu 720 satır var. drop edilebilir.
  temp = df.loc[df["interruption"] == 1]
  index_values = temp.index.values
  df = df.drop(index_values,axis = 0)
  return df

def seasonParameter(df):
  df["is_spring"] = 0
  df["is_summer"] = 0
  df["is_fall"] = 0
  df["is_winter"] = 0
  df.loc[df["month"] == 1, "is_winter"] = 1
  df.loc[df["month"] == 2, "is_winter"] = 1
  df.loc[df["month"] == 3, "is_spring"] = 1
  df.loc[df["month"] == 4, "is_spring"] = 1
  df.loc[df["month"] == 5, "is_spring"] = 1
  df.loc[df["month"] == 6, "is_summer"] = 1
  df.loc[df["month"] == 7, "is_summer"] = 1
  df.loc[df["month"] == 8, "is_summer"] = 1
  df.loc[df["month"] == 9, "is_fall"] = 1
  df.loc[df["month"] == 10, "is_fall"] = 1
  df.loc[df["month"] == 11, "is_fall"] = 1
  df.loc[df["month"] == 12, "is_winter"] = 1
  return df

def balanceInterruption(df):
  # df.loc[(df["interruption"] == 1) & (df["hour"] > 5) ,"d_enerji"] *= 0.95
  df.loc[(df['interruption'] == 1) & (df['hour'] > 5), 'd_enerji'] *= 0.95
  return df

def allProcess(df):
  df = splitDateColumn(df)
  df = addInterruptionInfo(df)
  df = addWeekendInfo(df)
  df = seasonParameter(df)
  # df = dropInterruptions(df)
  # df = balanceInterruption(df) # başka bir yöntem bulmam gerek.
  # df = df.drop(["tarih"],axis = 1)
  return df

def TabularPredictor(train_data,sub_df):
  # train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
  predictor = TabularPredictor(label='d_enerji').fit(train_data)
  y_pred = predictor.predict(sub_df)
  return y_pred

def createSubmissionCsv(train, sub_df,y_pred,dates_for_submission):
  submission_df = pd.DataFrame()
  submission_df["tarih"] = dates_for_submission
  # submission_df.index = sub["tarih"].values
  submission_df["Dağıtılan Enerji (MWh)"] = y_pred
  return submission_df

In [None]:
def giveAllDatesWithHours(train,sub):
  train_dates = train["tarih"].tolist()
  sub_dates = sub["tarih"].tolist()
  all_dates = train_dates + sub_dates
  return all_dates

In [None]:
def holidaysProcessing(holidays,allDates):
  holidays.loc[holidays["month"] == "Ocak","month"] = 1
  holidays.loc[holidays["month"] == "Şubat","month"] = 2
  holidays.loc[holidays["month"] == "Mart","month"] = 3
  holidays.loc[holidays["month"] == "Nisan","month"] = 4
  holidays.loc[holidays["month"] == "Mayıs","month"] = 5
  holidays.loc[holidays["month"] == "Haziran","month"] = 6
  holidays.loc[holidays["month"] == "Temmuz","month"] = 7
  holidays.loc[holidays["month"] == "Ağustos","month"] = 8
  holidays.loc[holidays["month"] == "Eylül","month"] = 9
  holidays.loc[holidays["month"] == "Ekim","month"] = 10
  holidays.loc[holidays["month"] == "Kasım","month"] = 11
  holidays.loc[holidays["month"] == "Aralık","month"] = 12
  holidays["ds"] = holidays["day"].astype(str) + "-" + holidays["month"].astype(str) + "-" + holidays["year"].astype(str)
  holidays["ds"] = pd.to_datetime(holidays["ds"]).dt.date
  holidays = holidays.drop(["day","month","year"], axis = 1)
  holidays["holiday"] = ["tatil"] * holidays.shape[0]
  return holidays

In [None]:
train = pd.read_csv("/content/drive/MyDrive/gdz_elektrik/kaggle/train.csv")
med = pd.read_csv("/content/drive/MyDrive/gdz_elektrik/kaggle/med.csv")
sub = pd.read_csv("/content/drive/MyDrive/gdz_elektrik/kaggle/sample_submission.csv")
holidays = pd.read_csv("/content/drive/MyDrive/gdz_elektrik/kaggle/tatil_gunleri_2018-2022.csv")
train.columns = ["tarih","d_enerji"]
sub.columns = ["tarih","d_enerji"]
med.columns = ["tarih"]
dates = med["tarih"].values
dates_for_submission = sub["tarih"].values
allDates = giveAllDatesWithHours(train,sub)
# holidays = holidaysProcessing(holidays,allDates) # benim oluşturduğum holiday, daha başarılı bir holiday datası bulduğum için iptal ettim.

In [None]:
sub_test = sub.copy()

In [None]:
train = allProcess(train)
sub_df = allProcess(sub_test)
sub = sub.drop("d_enerji",axis = 1)
sub_df = sub_df.drop("d_enerji",axis = 1)
holidays = pd.read_csv("/content/drive/MyDrive/gdz_elektrik/kaggle/Calendar.csv")
holidays = holidays[['CALENDAR_DATE','RAMADAN_FLAG','PUBLIC_HOLIDAY_FLAG']].rename(columns={'CALENDAR_DATE':'ds'})
holidays['holiday'] = np.where((holidays['RAMADAN_FLAG'] == 'Y') | (holidays['PUBLIC_HOLIDAY_FLAG'] == 'Y'), 'TR-Holidays', 0)
holidays = holidays[['ds','holiday']]
holidays = holidays[holidays['holiday'] == 'TR-Holidays']

In [None]:
# train = balanceInterruption(train)

Date sütunu varken 4.99726, Date sütunu yokken: 4.99726 değişen bir şey  yok.

In [None]:
def prophet(train,sub_df,holidays):
  train_for_model = train.copy()
  train_for_model = train_for_model.rename(columns = {
      "tarihwithhours":"ds",
      "d_enerji":"y"
  })
  sub_for_model = sub_df.copy()
  sub_for_model = sub_for_model.rename(columns = {"tarihwithhours":"ds"})

  from prophet import Prophet # 5.54701, hiçbir seasonality eklemeden.
  model = Prophet(holidays = holidays)
  model.add_seasonality(name='weekly', period=7, fourier_order=5, condition_name='is_weekend')
  model.add_seasonality(name='yearly', period=365.25/4, fourier_order=10, condition_name='is_spring')
  model.add_seasonality(name='yearly', period=365.25/4, fourier_order=10, condition_name='is_summer')
  model.add_seasonality(name='yearly', period=365.25/4, fourier_order=10, condition_name='is_fall')
  model.add_seasonality(name='yearly', period=365.25/4, fourier_order=10, condition_name='is_winter')
  model.add_seasonality(name='monthly', period=30.5, fourier_order=5, mode='additive') #
  model.add_seasonality(name='yearly', period=365.25, fourier_order=10, mode='additive')
  model.add_seasonality(name='hourly', period=24, fourier_order=5)

  model.fit(train_for_model)

  forecast = model.predict(sub_for_model)
  y_pred = forecast["yhat"]
  return y_pred

In [None]:
def prepareDatasForProphet(train,sub_df):
  train = train[["tarih","tarihwithhours","year","month","day","dayofweek","hour","interruption","is_weekend","is_spring","is_summer","is_fall","is_winter","d_enerji"]]
  train = train.rename(columns = {
      "tarihwithhours":"ds",
      "d_enerji":"y"
  })
  sub_df = sub_df.rename(columns = {"tarihwithhours":"ds"})
  sub_df = sub_df[["tarih","ds","year","month","day","dayofweek","hour","interruption","is_weekend","is_spring","is_summer","is_fall","is_winter"]]
  return train,sub_df

In [None]:
def prophetForDailyPrediction(train,daily_sub_for_model):
  from prophet import Prophet 
  model = Prophet(holidays = holidays,daily_seasonality = True)
  model.add_seasonality(name='weekly', period=7, fourier_order=5, condition_name='is_weekend')
  model.add_seasonality(name='yearly', period=365.25/4, fourier_order=10, condition_name='is_spring')
  model.add_seasonality(name='yearly', period=365.25/4, fourier_order=10, condition_name='is_summer')
  model.add_seasonality(name='yearly', period=365.25/4, fourier_order=10, condition_name='is_fall')
  model.add_seasonality(name='yearly', period=365.25/4, fourier_order=10, condition_name='is_winter')
  model.add_seasonality(name='monthly', period=30.5, fourier_order=5, mode='additive') 
  model.add_seasonality(name='yearly', period=365.25, fourier_order=10, mode='additive')
  model.add_seasonality(name='hourly', period=24, fourier_order=5)

  model.fit(train)
  forecast = model.predict(daily_sub_for_model)
  # forecast = model.predict(daily_sub_for_model)
  # y_pred = forecast["yhat"]
  y_pred = forecast
  return y_pred

In [None]:
dates_sub_for_model = sub_df["tarih"].unique().tolist()

In [None]:
train,sub_df = prepareDatasForProphet(train,sub_df)

In [None]:
# all_predictions = pd.DataFrame()
# for date in dates_sub_for_model:
#   daily_sub_for_model = sub_df.loc[sub_df["tarih"] == date]
#   forecast = prophetForDailyPrediction(train,daily_sub_for_model)
#   daily_sub_for_model["y"] = forecast["yhat"].values
#   train = train.append(daily_sub_for_model)
#   all_predictions = all_predictions.append(daily_sub_for_model)

In [None]:
# all_predictions.to_csv(f"/content/drive/MyDrive/gdz_elektrik/all_predictions.csv",index=False)

In [None]:
showPredictionsOnGraph(train,sub_df,allDates,y_pred)

In [None]:
submission_df = createSubmissionCsv(train, sub_df, y_pred,dates_for_submission)

In [None]:
submission_df

Unnamed: 0,tarih,Dağıtılan Enerji (MWh)
0,2022-08-01 00:00:00,2122.141941
1,2022-08-01 01:00:00,1991.007302
2,2022-08-01 02:00:00,1905.319543
3,2022-08-01 03:00:00,1850.069503
4,2022-08-01 04:00:00,1812.249943
...,...,...
739,2022-08-31 19:00:00,2397.645333
740,2022-08-31 20:00:00,2387.193529
741,2022-08-31 21:00:00,2339.876773
742,2022-08-31 22:00:00,2231.970206


In [None]:
file_name = "predictionsprophetdailytestv2"
submission_df.to_csv(f"/content/drive/MyDrive/gdz_elektrik/{file_name}.csv",index=False)
print(f"/content/drive/MyDrive/gdz_elektrik/{file_name}.csv kaydedildi.")

/content/drive/MyDrive/gdz_elektrik/predictionsprophetdailytestv2.csv kaydedildi.
