In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import tqdm

$P_{installed}=Number of Panels×P_{WP} = \frac{A_{installed}}{A_{panel}}×P_{WP}$ 

and this can be validated by $P_{installed} = \frac{total \ output}{P_{Wp}*\eta_{total}}$ 

from the paper: $P_{WP} = 285WP$

$A_{panel} = 1.6m^2$

$A_{installed}\approx 150.588 km^2$

so $P_{installed}=Number of Panels×P_{WP} = \frac{A_{installed}}{A_{panel}}×P_{WP}=\frac{150588000}{1.6}*285WP = 26.823 GWP $

calculated specific PV power output: 919 kWh/kWP

In [6]:
PV_generation = pd.read_csv('PV_data/rooftop_PV_CH_EPV_W_by_building.csv')
irradiance = pd.read_csv('PV_data/rooftop_PV_CH_Gt_W_m2_by_building.csv')

In [2]:
pv_processed = pd.read_csv('PV_data/PV_Building_processed_3000.csv')

In [10]:
unallocated = pv_processed[pv_processed['MV_grid'] == '-1']
total = len(pv_processed)
allocated_rate = 1-len(unallocated)/total
print('Allocated rate:',allocated_rate)

Allocated rate: 0.9748652123614917


In [7]:
PV_generation_slice = PV_generation[['2001-06-15 12:00:00', '2001-07-15 12:00:00','2001-08-15 12:00:00']]
irradiance_slice = irradiance[['2001-06-15 12:00:00', '2001-07-15 12:00:00','2001-08-15 12:00:00']]
PV_generation_slice.columns = ['June midday', 'July midday', 'August midday']
irradiance_slice.columns = ['June midday', 'July midday', 'August midday']
PV_generation_slice_total = PV_generation_slice.sum(axis=0)
irradiance_slice_total = irradiance_slice.sum(axis=0)

In [3]:
area = PV_generation.iloc[:,1:]/(irradiance.iloc[:,1:]*0.138)
#area = area.dropna(axis=1)

In [4]:
# fill 0 with average value of the row
area = area.replace(0, np.nan)
area = area.fillna(area.mean(axis=1))

In [5]:
area['mean'] = area.mean(axis=1)

In [6]:
area.head()

Unnamed: 0,2001-01-15 08:00:00,2001-01-15 09:00:00,2001-01-15 10:00:00,2001-01-15 11:00:00,2001-01-15 12:00:00,2001-01-15 13:00:00,2001-01-15 14:00:00,2001-01-15 15:00:00,2001-01-15 16:00:00,2001-02-15 07:00:00,...,2001-11-15 15:00:00,2001-12-15 08:00:00,2001-12-15 09:00:00,2001-12-15 10:00:00,2001-12-15 11:00:00,2001-12-15 12:00:00,2001-12-15 13:00:00,2001-12-15 14:00:00,2001-12-15 15:00:00,mean
0,55.06412,55.877332,55.977957,55.996616,55.996613,55.869383,55.834254,54.328829,,52.10288,...,53.475167,54.992231,55.834254,55.899286,55.988713,55.918581,55.719895,55.386358,50.594781,50.61117
1,48.167744,47.903163,47.172773,46.704598,46.677764,47.130788,47.968714,48.085255,,46.397346,...,46.759075,48.18083,47.921506,47.255737,46.863952,46.933802,47.503088,48.188696,47.480024,43.572066
2,31.001487,31.05244,30.497183,30.230699,30.326799,30.710015,31.160813,30.686079,,29.484925,...,29.109019,30.735086,31.02089,30.603454,30.412674,30.548229,30.892741,31.021351,28.636895,27.756941
3,48.851841,49.887177,49.455288,48.995479,48.860923,49.125761,49.774529,49.251962,,42.046901,...,45.577047,48.45422,49.890226,49.529194,49.189059,49.189061,49.489382,49.888587,45.978107,44.67465
4,38.891312,38.910675,38.912032,38.918345,38.891304,38.361943,35.163611,29.963875,,38.636366,...,27.532918,38.852913,38.921331,38.921452,38.911538,38.737008,37.58082,33.756816,22.272074,34.811122


In [10]:
total_area = int(area['mean']).sum()

TypeError: cannot convert the series to <class 'int'>

In [8]:
total_area/10**6 # in km^2

149.1764403619222

In [9]:
P_installed = 285*total_area/(1.6*10**9) # GWp
P_installed

26.572053439467393

In [45]:
grid_names = os.listdir('PV_allocation_results/3000')
# remove P_total.csv
grid_names.remove('P_total.csv')
installed_sum = 0
total_installed = pd.DataFrame(columns = ['grid_name', 'MV_osmid', 'P_installed (kWp)'])
for name in grid_names:
    P_installed = pd.read_csv(f'PV_allocation_results/3000/{name}/{name}_P_installed.csv')
    P_installed['grid_name'] = name
    total_installed = pd.concat([total_installed, P_installed])
    installed_sum += P_installed['P_installed (kWp)'].sum()
print('total installed capacity:', installed_sum/10**6, 'GWp')

  total_installed = pd.concat([total_installed, P_installed])


total installed capacity: 0.599030018848627 GWp


In [12]:
buffer_distance = '3000'
PV_building = pd.read_csv('PV_data/PV_Building_processed_'+buffer_distance+'.csv')
PV_building['MV_osmid'] = PV_building['MV_osmid'].astype(int)

In [15]:
PV_building = PV_building[PV_building['MV_grid']!='-1']

In [16]:
def mapping(PV_building, df):
    mv_grid_dict = dict(zip(PV_building['SB_UUID'], PV_building['MV_grid']))
    mv_osmid_dict = dict(zip(PV_building['SB_UUID'], PV_building['MV_osmid'])) 
    df['MV_grid'] = df['SB_UUID'].map(mv_grid_dict)
    df['MV_osmid'] = df['SB_UUID'].map(mv_osmid_dict)
    df = df[df['MV_grid'].notna()] 
    return df

In [17]:
irradiance_mapped = mapping(PV_building, irradiance)
PV_generation_mapped = mapping(PV_building, PV_generation)

In [8]:
def area_calculation(PV_generation_data, irradiance_data):
    P_data =pd.DataFrame(columns=['grid_id','MV_osmid', 'P_installed (kWp)'])
    # delete the columns with all zeros
    #PV_generation_data = PV_generation_data.loc[:, (PV_generation_data != 0).any(axis=0)]
    #irradiance_data = irradiance_data.loc[:, (irradiance_data != 0).any(axis=0)]
    area = PV_generation_data.iloc[:,1:-2]/(irradiance_data.iloc[:,1:-2]*0.138)
    area = area.replace(0, np.nan)
    area = area.fillna(area.mean(axis=1))
    area['mean'] = area.mean(axis=1)
    P_installed = 285*area['mean']/(1.6*10**3) #kWp
    P_data['grid_id'] = PV_generation_data['MV_grid']
    P_data['MV_osmid'] = PV_generation_data['MV_osmid']
    P_data['P_installed (kWp)'] = P_installed
    return P_data

In [9]:
p_test= area_calculation(PV_generation_mapped, irradiance_mapped)

In [10]:
len(p_test)

2223844

In [11]:
#p_test
# add the P_installed(kWp) together if they have the same MV_osmid and grid_id
p_test_sum = p_test.groupby(['grid_id', 'MV_osmid']).sum().reset_index()

In [25]:
total = pd.read_csv('PV_allocation_results/3000/3000_installed_capacity.csv')
total['P_installed (kWp)'].sum()/10**6

26.06498625808094

In [5]:
demand_pre = pd.read_pickle('PV_data/0_0_demand.pkl')
demand_pre.iloc[:,0:15].head()

Unnamed: 0,MV_osmid,2001-01-15 00:00:00,2001-01-15 01:00:00,2001-01-15 02:00:00,2001-01-15 03:00:00,2001-01-15 04:00:00,2001-01-15 05:00:00,2001-01-15 06:00:00,2001-01-15 07:00:00,2001-01-15 08:00:00,2001-01-15 09:00:00,2001-01-15 10:00:00,2001-01-15 11:00:00,2001-01-15 12:00:00,2001-01-15 13:00:00
0,0,0,0,0,0,0,0,0,0,291955.030262,562711.969984,790032.753651,930106.08425,933252.57749,783994.535356
1,3,0,0,0,0,0,0,0,0,52318.331361,107572.983096,158209.11312,188301.38003,186935.26392,157324.51025
2,6,0,0,0,0,0,0,0,0,9451.43688,19070.1647,25285.62302,28944.88629,28290.16265,23174.49238
3,7,0,0,0,0,0,0,0,0,39431.929214,71766.54452,96816.7445,111558.94395,110037.46654,90351.57691
4,9,0,0,0,0,0,0,0,0,7429.46281,13651.003114,25663.22587,34708.58133,36016.61025,29874.32679


In [30]:
demand = pd.read_pickle('PV_allocation_results/3000/0_0/0_0_demand.pkl')
demand['max_demand'] = demand.max(axis=1)/10**3
demand['sum_demand (Gwh)'] = demand.sum(axis=1)*30.4167/10**9
demand.head()

Unnamed: 0,MV_osmid,2001-01-15 00:00:00,2001-01-15 01:00:00,2001-01-15 02:00:00,2001-01-15 03:00:00,2001-01-15 04:00:00,2001-01-15 05:00:00,2001-01-15 06:00:00,2001-01-15 07:00:00,2001-01-15 08:00:00,...,2001-12-15 16:00:00,2001-12-15 17:00:00,2001-12-15 18:00:00,2001-12-15 19:00:00,2001-12-15 20:00:00,2001-12-15 21:00:00,2001-12-15 22:00:00,2001-12-15 23:00:00,max_demand,sum_demand (Gwh)
0,0.0,0,0,0,0,0,0,0,0,291955.030262,...,0,0,0,0,0,0,0,0,2448.67415,4.745344
1,3.0,0,0,0,0,0,0,0,0,52318.331361,...,0,0,0,0,0,0,0,0,488.908645,0.953698
2,6.0,0,0,0,0,0,0,0,0,9451.43688,...,0,0,0,0,0,0,0,0,77.736568,0.147707
3,7.0,0,0,0,0,0,0,0,0,39431.929214,...,0,0,0,0,0,0,0,0,336.263311,0.638817
4,9.0,0,0,0,0,0,0,0,0,7429.46281,...,0,0,0,0,0,0,0,0,98.406757,0.184281


In [12]:
p_test_sum

Unnamed: 0,grid_id,MV_osmid,P_installed (kWp)
0,0_0,0.0,5507.940352
1,0_0,3.0,1112.038449
2,0_0,6.0,169.807131
3,0_0,7.0,742.162953
4,0_0,9.0,215.069930
...,...,...,...
29496,9_2,150.0,627.306094
29497,9_2,152.0,740.164592
29498,9_2,153.0,1539.915761
29499,9_2,154.0,169.820396


In [120]:
p_test_sum.to_csv('PV_allocation_results/3000/3000_installed_capacity.csv', index=False)

In [None]:
df_time_series = mapping(PV_building, PV_time_series)
df_time_series_area = mapping(PV_building, PV_time_series_area)

In [None]:
MV_grids_ids = PV_building['MV_grid'].unique()
MV_grids_ids = MV_grids_ids[MV_grids_ids != '-1']
# rearrange the order of MV grid ids to start with the smallest id
MV_grids_ids = np.sort(MV_grids_ids)
P_total = pd.DataFrame(columns=['grid_id','MV_osmid', 'P_installed (kWp)'])