In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data_dir = '../input/solar-power-generation-data/'
plant1_sensor = pd.read_csv(f'{data_dir}Plant_1_Weather_Sensor_Data.csv')
plant1_generation = pd.read_csv(f'{data_dir}Plant_1_Generation_Data.csv')
plant2_sensor = pd.read_csv(f'{data_dir}Plant_2_Weather_Sensor_Data.csv')
plant2_generation = pd.read_csv(f'{data_dir}Plant_2_Generation_Data.csv')

In [None]:
print(plant1_generation.head(3))
print(plant2_generation.head(3))

In [None]:
print(plant1_sensor.head())
print(plant2_sensor.head())

In [None]:
print(plant1_sensor.describe())
print(plant2_sensor.describe())

In [None]:
print(plant1_generation.describe())
print(plant2_generation.describe())

In [None]:
def prep_time_series(data):
    try:
        data['DATE_TIME'] = pd.to_datetime(data['DATE_TIME'],format='%d-%m-%Y %H:%M')
    except:
        data['DATE_TIME'] = pd.to_datetime(data['DATE_TIME'],format='%Y-%m-%d %H:%M:%S')

    time_series_data = data.set_index('DATE_TIME')
    data_cols = ['DC_POWER','AC_POWER','DAILY_YIELD','TOTAL_YIELD']
    time_series_data_weekly = time_series_data[data_cols].resample('W').mean()
    time_series_data_daily = time_series_data[data_cols].resample('24H').mean()
    return time_series_data_weekly, time_series_data_daily


In [None]:
plant1_generation_weekly, plant1_generation_daily = prep_time_series(plant1_generation)
plant2_generation_weekly, plant2_generation_daily = prep_time_series(plant2_generation)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sns.set(rc={'figure.figsize':(20, 8)})
fig, ax = plt.subplots()
ax.plot(plant1_generation_weekly['AC_POWER'], marker='',linewidth=2, linestyle='--',label='AC_POWER_WEEKLY-PLANT-1')
ax.plot(plant2_generation_weekly['AC_POWER'], marker='o',linewidth=2, linestyle='--',label='AC_POWER_WEEKLY-PLANT-2')
ax.plot(plant1_generation_daily['AC_POWER'], marker='',linewidth=2, linestyle='-',label='AC_POWER_DAILY-PLANT-1')
ax.plot(plant2_generation_daily['AC_POWER'], marker='o',linewidth=2, linestyle='-',label='AC_POWER_DAILY-PLANT-2')

ax.set_ylabel('Solar Power Generation (kW)')
ax.legend()
    

In [None]:
def prep_time_series_sensor(data):
    try:
        data['DATE_TIME'] = pd.to_datetime(data['DATE_TIME'],format='%d-%m-%Y %H:%M')
    except:
        data['DATE_TIME'] = pd.to_datetime(data['DATE_TIME'],format='%Y-%m-%d %H:%M:%S')

    time_series_data = data.set_index('DATE_TIME')
    data_cols = ['AMBIENT_TEMPERATURE','MODULE_TEMPERATURE','IRRADIATION']
    time_series_data_weekly = time_series_data[data_cols].resample('W').mean()
    time_series_data_daily = time_series_data[data_cols].resample('24H').mean()
    return time_series_data_weekly, time_series_data_daily


In [None]:
plant1_sensor_weekly, plant1_sensor_daily = prep_time_series_sensor(plant1_sensor)
plant2_sensor_weekly, plant2_sensor_daily = prep_time_series_sensor(plant2_sensor)

In [None]:
sns.set(rc={'figure.figsize':(20, 8)})
fig, ax = plt.subplots()
ax.plot(plant1_sensor_weekly['AMBIENT_TEMPERATURE'], marker='',linewidth=2, linestyle='--',label='AMBIENT_TEMPERATURE_WEEKLY-PLANT-1')
ax.plot(plant2_sensor_weekly['AMBIENT_TEMPERATURE'], marker='o',linewidth=2, linestyle='--',label='AMBIENT_TEMPERATURE_WEEKLY-PLANT-2')
ax.plot(plant1_sensor_daily['AMBIENT_TEMPERATURE'], marker='',linewidth=2, linestyle='-',label='AMBIENT_TEMPERATURE_DAILY-PLANT-1')
ax.plot(plant2_sensor_daily['AMBIENT_TEMPERATURE'], marker='o',linewidth=2, linestyle='-',label='AMBIENT_TEMPERATURE_DAILY-PLANT-2')

ax.set_ylabel('Ambient Temperature (C)')
ax.legend()
    

In [None]:
sns.set(rc={'figure.figsize':(20, 8)})
fig, ax = plt.subplots()
ax.plot(plant1_sensor_weekly['AMBIENT_TEMPERATURE'] - plant1_sensor_weekly['MODULE_TEMPERATURE'], marker='o',linewidth=2, linestyle='--',label='TEMPERATURE DIFFERENCE WEEKLY PLANT-1')
ax.plot(plant2_sensor_weekly['AMBIENT_TEMPERATURE'] - plant2_sensor_weekly['MODULE_TEMPERATURE'], marker='o',linewidth=2, linestyle='--',label='TEMPERATURE DIFFERENCE WEEKLY PLANT-2')

ax.set_ylabel('Ambient Temperature (C)')
ax.legend()
    

In [None]:
sns.set(rc={'figure.figsize':(20, 8)})
fig, ax = plt.subplots()
n, bins, patches = ax.hist(x=plant1_sensor['IRRADIATION'], bins=5, color='purple',alpha=0.7, rwidth=0.5, label='Plant 1')
n, bins, patches = ax.hist(x=plant2_sensor['IRRADIATION'], bins=5, color='pink',alpha=0.7, rwidth=0.5, label='Plant 2')
ax.grid(axis='y', alpha=0.75)
ax.legend()
plt.xlabel('IRRADIATION')
plt.ylabel('Frequency')


In [None]:
plant1_data = pd.merge(left=plant1_sensor_daily,right=plant1_generation_daily, left_on=plant1_sensor_daily.index, 
                       right_on=plant1_generation_daily.index)

In [None]:
plant2_data = pd.merge(left=plant2_sensor_daily,right=plant2_generation_daily, left_on=plant2_sensor_daily.index, 
                       right_on=plant2_generation_daily.index)

In [None]:
sns.set(rc={'figure.figsize':(20, 8)})
fig, ax = plt.subplots()
ax.scatter(plant1_data['AC_POWER'], plant1_data['IRRADIATION'], c='red',label='Plant 1')
ax.scatter(plant2_data['AC_POWER'], plant2_data['IRRADIATION'], c='purple',label='Plant 2')
ax.legend()
plt.xlabel('AC Power')
plt.ylabel('Irradiation')
plt.show()

In [None]:
sns.set(rc={'figure.figsize':(20, 8)})
fig, ax = plt.subplots()
ax.scatter(plant1_data['DC_POWER'], plant1_data['IRRADIATION'], c='blue', label='Plant 1')
ax.scatter(plant2_data['DC_POWER'], plant2_data['IRRADIATION'], c='green', label='Plant 2')
ax.legend()
ax.grid(axis='y', alpha=0.75)
plt.xlabel('DC Power')
plt.ylabel('Irradiation')

In [None]:
sns.set(rc={'figure.figsize':(20, 8)})
fig, ax = plt.subplots()
ax.scatter(plant1_data['DAILY_YIELD'], plant1_data['AMBIENT_TEMPERATURE'], c='orange',label='Plant 1')
ax.scatter(plant2_data['DAILY_YIELD'], plant2_data['AMBIENT_TEMPERATURE'], c='violet', label='Plant 2')
ax.legend()
ax.grid(axis='y', alpha=0.75)
plt.xlabel('Yield')
plt.ylabel('Irradiation')