In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.plotting import autocorrelation_plot
from statsmodels.tsa.arima_model import ARIMA

In [None]:
plant1_generation_data_df = pd.read_csv('/kaggle/input/solar-power-generation-data/Plant_1_Generation_Data.csv',parse_dates=['DATE_TIME'])
x = plant1_generation_data_df.loc[(plant1_generation_data_df['SOURCE_KEY'] == 'uHbuxQJl8lW7ozc') & (plant1_generation_data_df['DAILY_YIELD'] > 0),:]
print(x.head())
plant1_generation_data_df['DATE'] = [d.date() for d in plant1_generation_data_df['DATE_TIME']]

## Get the AC/DC Yield

In [None]:
ac_dc_df = plant1_generation_data_df.drop(columns = ['DATE_TIME'])

In [None]:
ac_dc_agg_df = ac_dc_df.groupby(['DATE', 'PLANT_ID']).agg({'DC_POWER':'sum', 'AC_POWER':'sum'}).reset_index()
ac_dc_agg_df

## Get the Daily Yield and Total Yield

In [None]:
yield_df = plant1_generation_data_df.sort_values('DATE_TIME').groupby('DATE').tail(1)
yield_df

In [None]:
sns.set(rc={'figure.figsize':(30,20)})
fig, axs = plt.subplots(3, 2)

sns.lineplot(data=yield_df,ax=axs[0,0], x='DATE', y='TOTAL_YIELD', label='Total Yield')
axs[0,0].set_title('Solar Power Generation Total Yield')
axs[0,0].set_ylabel('Per Day')
axs[0,0].set_xlabel('Yield')
axs[0,0].xaxis.set_major_locator(plt.MaxNLocator(5))
axs[0,0].yaxis.set_major_locator(plt.MaxNLocator(5))

sns.lineplot(data=yield_df,ax=axs[0,1], x='DATE', y='DAILY_YIELD', label='Daily Yield')
axs[0,1].set_title('Solar Power Generation Daily Yield')
axs[0,1].set_ylabel('Per Day')
axs[0,1].set_xlabel('Yield')
axs[0,1].xaxis.set_major_locator(plt.MaxNLocator(5))
axs[0,1].yaxis.set_major_locator(plt.MaxNLocator(5))

sns.lineplot(data=ac_dc_agg_df,ax=axs[1,0], x='DATE', y='DC_POWER', label='DC Power')
axs[1,0].set_title('Solar Power Generation DC Power')
axs[1,0].set_ylabel('Per Day')
axs[1,0].set_xlabel('Yield')
axs[1,0].xaxis.set_major_locator(plt.MaxNLocator(5))
axs[1,0].yaxis.set_major_locator(plt.MaxNLocator(5))

sns.lineplot(data=ac_dc_agg_df,ax=axs[1,1], x='DATE', y='AC_POWER', label='AC Power')
axs[1,1].set_title('Solar Power Generation AC pOWER')
axs[1,1].set_ylabel('Per Day')
axs[1,1].set_xlabel('Yield')
axs[1,1].xaxis.set_major_locator(plt.MaxNLocator(5))
axs[1,1].yaxis.set_major_locator(plt.MaxNLocator(5))

sns.heatmap(ac_dc_agg_df.loc[:, ['DC_POWER', 'AC_POWER']].corr(), ax=axs[2,0], annot=True, annot_kws={'size':12})
axs[2,0].set_title('Correlation between DC Power and AC Power')

In [None]:
ts = pd.Series(yield_df['TOTAL_YIELD'].values, index=yield_df['DATE'])

In [None]:
autocorrelation_plot(ts)