In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Loading the dataframes

In [None]:
df_pgen1 = pd.read_csv('../input/solar-power-generation-data/Plant_1_Generation_Data.csv');
df_pgen2 = pd.read_csv('../input/solar-power-generation-data/Plant_2_Generation_Data.csv');
df_psense1 = pd.read_csv('../input/solar-power-generation-data/Plant_1_Weather_Sensor_Data.csv');
df_psense2 = pd.read_csv('../input/solar-power-generation-data/Plant_2_Weather_Sensor_Data.csv');
#loading the csv files into the dataframes

Converting date and time to suitable format and columns

In [None]:
# Correcting date_time format
df_pgen1['DATE_TIME'] = pd.to_datetime(df_pgen1['DATE_TIME'],format = '%d-%m-%Y %H:%M')
df_psense1['DATE_TIME'] = pd.to_datetime(df_psense1['DATE_TIME'],format = '%Y-%m-%d %H:%M')

# Splitting date and time
df_pgen1['DATE'] = df_pgen1['DATE_TIME'].apply(lambda x:x.date())
df_pgen1['TIME'] = df_pgen1['DATE_TIME'].apply(lambda x:x.time())

df_psense1['DATE'] = df_psense1['DATE_TIME'].apply(lambda x:x.date())
df_psense1['TIME'] = df_psense1['DATE_TIME'].apply(lambda x:x.time())


# Correcting data_time format for the DATE column
df_pgen1['DATE'] = pd.to_datetime(df_pgen1['DATE'],format = '%Y-%m-%d')
df_psense1['DATE'] = pd.to_datetime(df_psense1['DATE'],format = '%Y-%m-%d')

# Splitting hour and minutes
df_pgen1['HOUR'] = pd.to_datetime(df_pgen1['TIME'],format='%H:%M:%S').dt.hour
df_pgen1['MINUTES'] = pd.to_datetime(df_pgen1['TIME'],format='%H:%M:%S').dt.minute

df_psense1['HOUR'] = pd.to_datetime(df_psense1['TIME'],format='%H:%M:%S').dt.hour
df_psense1['MINUTES'] = pd.to_datetime(df_psense1['TIME'],format='%H:%M:%S').dt.minute

In [None]:
df_pgen1.head()

In [None]:
df_psense1.head()

How is the DC and AC Power changing as time goes by?

In [None]:
_, ax = plt.subplots(1, 1, figsize=(18, 9))

ax.plot(df_pgen1['DC_POWER'],
        df_pgen1['AC_POWER'],
        marker='o',
        linestyle='',
        alpha=.5, #transparency
        ms=3, #size of the dot
        label='Correlation Between DC Power & AC Power')
ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('Correlation Between DC Power & AC Power')
plt.xlabel('AC_POWER')
plt.ylabel('DC_POWER')
plt.show()

DC and AC Power are highly correlated, but this does *not* imply causality. As we heard from the domain expert's talk, both these variables are driven by other variables, notably: irradiation.

How is Irradiation changing as time goes by?

In [None]:
_, ax = plt.subplots(1, 1, figsize=(24, 10))

ax.plot(df_psense1['HOUR'],
        df_psense1['IRRADIATION'],
        marker='o',
        linestyle='',
        alpha=.5,
        ms=10,
        label='Irradiation With Time')
ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('Irradiation vs. Time')
plt.xlabel('Hour')
plt.ylabel('Irradiation')
plt.show()

As we can see, for almost all of the 34 days, the irradiation increases as one would expect alongside the movement of the sun in the sky.

How is Ambient temperature changing with time each day?

In [None]:
dates = df_psense1["DATE"].unique()

_, ax = plt.subplots(1, 1, figsize=(18,9))

for date in dates:
    df_data = df_psense1[df_psense1["DATE"] == date]
    
    ax.plot(df_data.HOUR,
            df_data.AMBIENT_TEMPERATURE,
            marker='o',
            linestyle='',
            alpha=.5,
            ms=10,
            label=pd.to_datetime(date,format='%Y-%m-%d').date()
            )
    
ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('Ambient Tempreture vs time in hours for each day')
plt.xlabel('HOURS')
plt.ylabel('Ambient Tempreture')
plt.show()

#the ambient temperature is plotted for each day in a different color, and is plotted against the hour in the day

1. How is the module temperature changing with time each day?

In [None]:
dates = df_psense1["DATE"].unique()

_, ax = plt.subplots(1, 1, figsize=(18,9))

for date in dates:
    df_data = df_psense1[df_psense1["DATE"] == date]
    
    ax.plot(df_data.HOUR,
            df_data.MODULE_TEMPERATURE,
            marker='o',
            linestyle='',
            alpha=.5,
            ms=10,
            label=pd.to_datetime(date,format='%Y-%m-%d').date()
            )
    
ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('Module Tempreture vs time in hours for each day')
plt.xlabel('HOURS')
plt.ylabel('Module Tempreture')
plt.show()

#the module temperature is plotted for each day in a different color, and is plotted against the hour in the day

How does Daily yield change with time?

In [None]:
_, ax = plt.subplots(1, 1, figsize=(24, 10))

ax.plot(df_pgen1['DATE'],
        df_pgen1['TOTAL_YIELD'],
        marker='o',
        linestyle='',
        alpha=.75,
        ms=5,
        label='Yield With Date')
ax.grid()
ax.margins(0.025)
ax.legend()
plt.title('Total Yield per Day')
plt.xlabel('DATE')
plt.ylabel('TOTAL_YIELD')
plt.show()

In the above graph, we can see the daily yield for each inverter plotted against each of the 34 days on the x axis. 
We can see an increase in the daily yield as we move from the months of May to June (Seasonal change)

How does ambient temperature change with module temperature?

In [None]:
dates = df_psense1['DATE'].unique()

_, ax = plt.subplots(1, 1, figsize=(18, 9))

for date in dates:
    df_data = df_psense1[df_psense1['DATE']==date]#[df_psense1['IRRADIATION']>0]

    ax.plot(df_data.AMBIENT_TEMPERATURE,
            df_data.MODULE_TEMPERATURE,
            marker='o',
            linestyle='',
            alpha=.5,
            ms=10,
            label=pd.to_datetime(date,format='%Y-%m-%d').date()
           )

ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('Module Temperature vs. Ambient Temperature')
plt.xlabel('Ambient Temperature')
plt.ylabel('Module Temperature')
plt.show()

We notice there is a weak, but generally positive correlation between the two.

How does Module Temperature change with Irradiation?

In [None]:
_, ax = plt.subplots(1, 1, figsize=(18, 9))

ax.plot(df_psense1['IRRADIATION'],
        df_psense1['MODULE_TEMPERATURE'],
        marker='o',
        linestyle='',
        alpha=.5,
        ms=10,
        label='module temperature')
ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('Irradiation vs. Module Tempreture')
plt.xlabel('Irradiation')
plt.ylabel('Module Tempreture')
plt.show()

There is a weak but positive correlation between the two. As we understood from the domain expert's explanation: this is because of teh way that sunlight works

How is ambient temperature affected by irradiation?

In [None]:
_, ax = plt.subplots(1, 1, figsize=(18, 9))

ax.plot(df_psense1['IRRADIATION'],
        df_psense1['AMBIENT_TEMPERATURE'],
        marker='o',
        linestyle='',
        alpha=.5,
        ms=10,
        label='ambient temperature')
ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('Irradiation vs. Ambient Tempreture')
plt.xlabel('Irradiation')
plt.ylabel('Ambient Tempreture')
plt.show()

As expected the ambient temperature is negligigbly correlated with irradiation. This is because while the irradiation falling on the panel might increase it's temperature (module temperature), the heat is frequently not enough to heat up the whole surroundings (ambient temperature)

How does Irradiation affect DC Power generation?

In [None]:
result_left = pd.merge(df_pgen1,df_psense1, on='DATE_TIME',how='left')

In [None]:
_, ax = plt.subplots(1, 1, figsize=(18, 9))

ax.plot(result_left.IRRADIATION,
        result_left.DC_POWER,
        marker='o',
        linestyle='',
        alpha=.5,
        ms=10,
        label='DC POWER')

ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('DC Power vs. Irradiation')
plt.xlabel('Irradiation')
plt.ylabel('DC Power')
plt.show()

As we might have expected the irradiation shows moderate correlation with DC Power, but it is not very strong, mostly because the irradiation is measured only from one sensor covering one panel, while the solar energy is probably generated from various different panels and then sent to these power generators which give us the DC_power. 

How does Irradiation affect AC Power?

In [None]:
_, ax = plt.subplots(1, 1, figsize=(18, 9))

ax.plot(result_left.IRRADIATION,
        result_left.AC_POWER,
        marker='o',
        linestyle='',
        alpha=.5,
        ms=10,
        label='AC POWER')

ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('AC Power vs. Irradiation')
plt.xlabel('Irradiation')
plt.ylabel('AC Power')
plt.show()

The graph for AC power is very similar to DC power, since the DC power is generated and then converted to AC power. (We can think of this graph as a slightly down shifted DC Power graph)