In [1]:
##Determination of Yield, Capacity Factor and Performance Ratio of two photovoltaic PV plants
import pandas as pd
import numpy as np
import matplotlib as plt
import math

In [2]:
#Import data from fotovoltaic plants 1 and 2
df1 =  pd.read_csv("PVPlant_lea1.csv", sep=';')
df2 =  pd.read_csv("PVPlant_lea2.csv", sep=';')

In [3]:
#Import convert 'data' column to datetime 
df1['Data'] = pd.to_datetime(df1['Data'], format = '%d-%m-%Y')
df2['Data'] = pd.to_datetime(df2['Data'], format = '%d-%m-%Y')

In [4]:
#Checking the number os days in the dataset of the pv plant1
len(df1.groupby('Data').groups.keys())

366

In [5]:
#Checking the number os days in the dataset of the pv plant2
len(df2.groupby('Data').groups.keys())

366

In [6]:
#finding the daily production by looking the maximum generation value in each day in pv plant 1
#and creating a new dataframe with 'data' and 'daily production'
df1 = df1.groupby('Data').agg({'Produção Hoje(kWh)':[max]})
df1.columns = ['prod_dia']
df1 = df1.reset_index()
df1

Unnamed: 0,Data,prod_dia
0,2020-01-01,6.1
1,2020-01-02,3.1
2,2020-01-03,6.8
3,2020-01-04,6.3
4,2020-01-05,6.0
...,...,...
361,2020-12-27,6.2
362,2020-12-28,6.2
363,2020-12-29,6.3
364,2020-12-30,5.6


In [7]:
#finding the daily production by looking the maximum generation value in each day in pv plant 2
#and creating a new dataframe with 'data' and 'daily production'
df2 = df2.groupby('Data').agg({'Produção Hoje(kWh)':[max]})
df2.columns = ['prod_dia']
df2 = df2.reset_index()
df2

Unnamed: 0,Data,prod_dia
0,2020-01-01,15.8
1,2020-01-02,7.4
2,2020-01-03,19.1
3,2020-01-04,17.1
4,2020-01-05,16.2
...,...,...
361,2020-12-27,18.0
362,2020-12-28,18.2
363,2020-12-29,17.1
364,2020-12-30,14.3


In [8]:
#Completing the dataset with the pv plant 1 productivity and capacity factor 
df1['productivity'] = df1['prod_dia']/1.5;
df1['CF'] = (df1['prod_dia']*100)/(24*1.5);
df1

Unnamed: 0,Data,prod_dia,productivity,CF
0,2020-01-01,6.1,4.066667,16.944444
1,2020-01-02,3.1,2.066667,8.611111
2,2020-01-03,6.8,4.533333,18.888889
3,2020-01-04,6.3,4.200000,17.500000
4,2020-01-05,6.0,4.000000,16.666667
...,...,...,...,...
361,2020-12-27,6.2,4.133333,17.222222
362,2020-12-28,6.2,4.133333,17.222222
363,2020-12-29,6.3,4.200000,17.500000
364,2020-12-30,5.6,3.733333,15.555556


In [9]:
#Completing the dataset with the pv plant 2 productivity and capacity factor 
df2['productivity'] = df2['prod_dia']/3.9;
df2['CF'] = (df2['prod_dia']*100)/(24*3.9);
df2

Unnamed: 0,Data,prod_dia,productivity,CF
0,2020-01-01,15.8,4.051282,16.880342
1,2020-01-02,7.4,1.897436,7.905983
2,2020-01-03,19.1,4.897436,20.405983
3,2020-01-04,17.1,4.384615,18.269231
4,2020-01-05,16.2,4.153846,17.307692
...,...,...,...,...
361,2020-12-27,18.0,4.615385,19.230769
362,2020-12-28,18.2,4.666667,19.444444
363,2020-12-29,17.1,4.384615,18.269231
364,2020-12-30,14.3,3.666667,15.277778


In [10]:
#Importing the irradiance values (necessary to determine the PR)
df3=  pd.read_csv("irrad_2020.csv", sep=';')
df3['Data'] = pd.to_datetime(df3['Data'], format = '%d-%m-%Y')
df3

Unnamed: 0,Data,Hora (UTC),Irrad_horiz (KJ/m2),Irrad_5graus(KJ/m2),Irrad_10graus(KJ/m2)
0,2020-01-01,00:00:00,5.2,5.180232,5.121080
1,2020-01-01,01:00:00,9.5,9.463886,9.355820
2,2020-01-01,02:00:00,9.8,9.762746,9.651267
3,2020-01-01,03:00:00,10.0,9.961986,9.848231
4,2020-01-01,04:00:00,10.3,10.260845,10.143678
...,...,...,...,...,...
7941,2020-12-31,19:00:00,1639.0,1632.769428,1614.125083
7942,2020-12-31,20:00:00,955.5,951.867717,940.998485
7943,2020-12-31,21:00:00,151.1,150.525601,148.806772
7944,2020-12-31,22:00:00,0.0,0.000000,0.000000


In [11]:
#converting the irradiance values from (KJ/m2) to (Wh/m2)
df3['Irrad_pv1(Wh/m2)']= df3['Irrad_5graus(KJ/m2)']*0.277778;
df3['Irrad_pv2(Wh/m2)']= df3['Irrad_5graus(KJ/m2)']*0.277778;
df3

Unnamed: 0,Data,Hora (UTC),Irrad_horiz (KJ/m2),Irrad_5graus(KJ/m2),Irrad_10graus(KJ/m2),Irrad_pv1(Wh/m2),Irrad_pv2(Wh/m2)
0,2020-01-01,00:00:00,5.2,5.180232,5.121080,1.438955,1.438955
1,2020-01-01,01:00:00,9.5,9.463886,9.355820,2.628859,2.628859
2,2020-01-01,02:00:00,9.8,9.762746,9.651267,2.711876,2.711876
3,2020-01-01,03:00:00,10.0,9.961986,9.848231,2.767220,2.767220
4,2020-01-01,04:00:00,10.3,10.260845,10.143678,2.850237,2.850237
...,...,...,...,...,...,...,...
7941,2020-12-31,19:00:00,1639.0,1632.769428,1614.125083,453.547426,453.547426
7942,2020-12-31,20:00:00,955.5,951.867717,940.998485,264.407911,264.407911
7943,2020-12-31,21:00:00,151.1,150.525601,148.806772,41.812700,41.812700
7944,2020-12-31,22:00:00,0.0,0.000000,0.000000,0.000000,0.000000


In [12]:
#finding the daily available irradiance by summing the average values per hour
#and creating a new dataframe with the necessary values to determinate the PR: 'data', 'sum_Irrad_pv1' e 'sum_Irrad_pv2'
irrad1 = df3.groupby(df3['Data'])['Irrad_pv1(Wh/m2)'].sum()#.sort_values()
irrad2 = df3.groupby(df3['Data'])['Irrad_pv2(Wh/m2)'].sum()#.sort_values()

#Converting the irradiance fom Wh/m2 to kWh/m2
irrad1= irrad1/1000
irrad2= irrad2/1000

In [13]:
#Completing the dataset of the pv plant 1 and 2 with irradiance 
df1=pd.merge(df1, irrad1, on=['Data']).reset_index()
df1 = df1.sort_values(["Data"])
df2=pd.merge(df2, irrad2, on=['Data']).reset_index()
df2 = df2.sort_values(["Data"])
df2

Unnamed: 0,index,Data,prod_dia,productivity,CF,Irrad_pv2(Wh/m2)
0,0,2020-01-01,15.8,4.051282,16.880342,4.138600
1,1,2020-01-02,7.4,1.897436,7.905983,1.845183
2,2,2020-01-03,19.1,4.897436,20.405983,5.243219
3,3,2020-01-04,17.1,4.384615,18.269231,4.832231
4,4,2020-01-05,16.2,4.153846,17.307692,4.697412
...,...,...,...,...,...,...
361,361,2020-12-27,18.0,4.615385,19.230769,5.077462
362,362,2020-12-28,18.2,4.666667,19.444444,5.393949
363,363,2020-12-29,17.1,4.384615,18.269231,5.430061
364,364,2020-12-30,14.3,3.666667,15.277778,5.277920


In [14]:
del df1['index']
del df2['index']

In [15]:
#Finishing the dataset of the pv plant 1 and 2 with the PR value
#PR = (productivity*100)/Irrad
df1['PR'] = (df1['productivity']*100)/df1['Irrad_pv1(Wh/m2)'];
df2['PR'] = (df2['productivity']*100)/df2['Irrad_pv2(Wh/m2)'];
df2

Unnamed: 0,Data,prod_dia,productivity,CF,Irrad_pv2(Wh/m2),PR
0,2020-01-01,15.8,4.051282,16.880342,4.138600,97.890169
1,2020-01-02,7.4,1.897436,7.905983,1.845183,102.831878
2,2020-01-03,19.1,4.897436,20.405983,5.243219,93.405145
3,2020-01-04,17.1,4.384615,18.269231,4.832231,90.736875
4,2020-01-05,16.2,4.153846,17.307692,4.697412,88.428397
...,...,...,...,...,...,...
361,2020-12-27,18.0,4.615385,19.230769,5.077462,90.899441
362,2020-12-28,18.2,4.666667,19.444444,5.393949,86.516699
363,2020-12-29,17.1,4.384615,18.269231,5.430061,80.747070
364,2020-12-30,14.3,3.666667,15.277778,5.277920,69.471819


In [16]:
#Plant 1: finding the monthly parameters by summing the pruductivity per day and the mean PR and CF
pv1a = df1.groupby(df1['Data'].dt.strftime('%B'))[ 'productivity'].sum()
pv1b = df1.groupby(df1['Data'].dt.strftime('%B'))['CF','PR'].mean()

pv1 =pd.merge(pv1a, pv1b, on=['Data']).reset_index()

cats = ['January', 'February', 'March', 'April','May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
pv1['Data'] = pd.Categorical(pv1['Data'],categories=cats, ordered=True)

pv1 = pv1.sort_values(["Data"])
pv1

  pv1b = df1.groupby(df1['Data'].dt.strftime('%B'))['CF','PR'].mean()


Unnamed: 0,Data,productivity,CF,PR
4,January,110.6,14.865591,87.594381
3,February,99.933333,14.358238,91.555166
7,March,110.266667,14.820789,96.518831
0,April,98.933333,13.740741,93.67769
8,May,99.8,13.413978,83.710469
6,June,93.466667,12.981481,88.237402
5,July,112.333333,15.098566,87.608705
1,August,126.933333,17.060932,82.179484
11,September,167.133333,23.212963,104.642062
10,October,162.466667,21.836918,96.098097


In [17]:
#finding the monthly parameters by summing the pruductivity per day and the mean PR and CF
pv2a = df2.groupby(df2['Data'].dt.strftime('%B'))[ 'productivity'].sum()
pv2b = df2.groupby(df2['Data'].dt.strftime('%B'))['CF','PR'].mean()

pv2 =pd.merge(pv2a, pv2b, on=['Data']).reset_index()

cats = ['January', 'February', 'March', 'April','May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
pv2['Data'] = pd.Categorical(pv2['Data'],categories=cats, ordered=True)

pv2 = pv2.sort_values(["Data"])
pv2

  pv2b = df2.groupby(df2['Data'].dt.strftime('%B'))['CF','PR'].mean()


Unnamed: 0,Data,productivity,CF,PR
4,January,120.74359,16.228977,93.04546
3,February,98.230769,14.113616,88.686625
7,March,115.0,15.456989,99.846421
0,April,109.435897,15.19943,103.613404
8,May,128.871795,17.321478,107.581099
6,June,117.871795,16.371083,110.506346
5,July,141.589744,19.03088,109.468895
1,August,160.692308,21.598428,104.320827
11,September,154.564103,21.467236,97.02452
10,October,149.025641,20.030328,88.14987


In [20]:
df1.to_csv("perday_plant1.csv")
df2.to_csv("perday_plant2.csv")

In [21]:
pv1.to_csv("permonth_plant1.csv")
pv2.to_csv("permonth_plant2.csv")