# Import the datasets

In [21]:
import numpy as np
import pandas as pd

In [22]:
network = pd.read_csv('network.csv',sep = ',')
inspections = pd.read_csv('inspections.csv',sep = ',')
fulmerged = pd.read_csv('fulmerged.csv',sep = ',')
incidence = pd.read_excel('incidence.xlsx')
xgu = pd.read_csv('XGBOOSTULTIMATE.csv',sep = ',')



In [23]:
network.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1446539 entries, 0 to 1446538
Data columns (total 12 columns):
 #   Column               Non-Null Count    Dtype  
---  ------               --------------    -----  
 0   PipeId               1446539 non-null  int64  
 1   Province             1446539 non-null  object 
 2   Town                 1446539 non-null  object 
 3   YearBuilt            1446539 non-null  int64  
 4   Material             1446539 non-null  object 
 5   GasType              1446539 non-null  object 
 6   Diameter             1446539 non-null  float64
 7   Length               1446539 non-null  float64
 8   Pressure             1446539 non-null  float64
 9   NumConnections       1446539 non-null  int64  
 10  NumConnectionsUnder  1446539 non-null  int64  
 11  BoolBridle           1446539 non-null  bool   
dtypes: bool(1), float64(3), int64(4), object(4)
memory usage: 122.8+ MB


In [24]:
incidence.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2555 entries, 0 to 2554
Data columns (total 25 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Unnamed: 0                        2555 non-null   int64  
 1   PipeId                            2555 non-null   int64  
 2   InspectionYear                    2555 non-null   int64  
 3   MonthsLastRev                     2555 non-null   int64  
 4   Severity                          2555 non-null   int64  
 5   Incidence                         2555 non-null   int64  
 6   Province                          2555 non-null   object 
 7   Town                              2555 non-null   object 
 8   Material                          2555 non-null   object 
 9   GasType                           2555 non-null   object 
 10  Diameter                          2555 non-null   float64
 11  Length                            2555 non-null   float64
 12  Pressu

In [25]:
inspections.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6345344 entries, 0 to 6345343
Data columns (total 7 columns):
 #   Column          Dtype 
---  ------          ----- 
 0   PipeId          int64 
 1   MaintenanceId   object
 2   InspectionYear  int64 
 3   InspectionDate  object
 4   MonthsLastRev   int64 
 5   Severity        int64 
 6   Incidence       int64 
dtypes: int64(5), object(2)
memory usage: 338.9+ MB


In [26]:
insp = inspections[inspections['InspectionYear'] > 2018]

In [27]:
xgu.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 909733 entries, 0 to 909732
Data columns (total 2 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   PipeId     909733 non-null  int64  
 1   Incidence  909733 non-null  float64
dtypes: float64(1), int64(1)
memory usage: 13.9 MB


In [28]:
insp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1352907 entries, 4977815 to 6345343
Data columns (total 7 columns):
 #   Column          Non-Null Count    Dtype 
---  ------          --------------    ----- 
 0   PipeId          1352907 non-null  int64 
 1   MaintenanceId   1352907 non-null  object
 2   InspectionYear  1352907 non-null  int64 
 3   InspectionDate  1352907 non-null  object
 4   MonthsLastRev   1352907 non-null  int64 
 5   Severity        1352907 non-null  int64 
 6   Incidence       1352907 non-null  int64 
dtypes: int64(5), object(2)
memory usage: 82.6+ MB


In [29]:
net = network

In [30]:
inttt = incidence

# Creating new parameters 

In [31]:
network.loc[:, 'volumetesttest'] = (3.14/4) * ((network['Diameter']/10).apply(np.sqrt)) * (network['Pressure']/8.314*298.15) * 0.8

### Selecting the data 

In [32]:
incidence = incidence[incidence['Incidence'] == 1]
incidence = incidence[incidence['InspectionYear'] > 2018]

### New parameters in the new dataset

In [33]:
# Convert spill volume from cubic centimeters to cubic meters
incidence['spil_to_cubicmeters'] = incidence['volumetesttest'] / 100 

# Convert spill volume in cubic meters to energy in kilowatt-hours
incidence['spil_to_kwh'] = incidence['spil_to_cubicmeters'] / 2.244

# Define gas prices in euros per kilowatt-hour
gas_natural_price_per_kwh = 0.05
gas_propane_price_per_kwh = 0.11

# Calculate spill cost in euros per second
incidence['spil_price'] = 0.0
incidence.loc[incidence['GasType'] == 'Gas natural', 'spil_price'] = incidence.loc[incidence['GasType'] == 'Gas natural', 'spil_to_kwh'] * gas_natural_price_per_kwh * (incidence['Severity'] / 100)
incidence.loc[incidence['GasType'] == 'Gas propano', 'spil_price'] = incidence.loc[incidence['GasType'] == 'Gas propano', 'spil_to_kwh'] * gas_propane_price_per_kwh * (incidence['Severity'] / 100)

# Print total spill cost in euros per second
print(f"Total spill cost: {incidence['spil_price'].sum()} euros per second")

# Define carbon footprint in kilograms of CO2 per kilowatt-hour
gas_natural_carbon_footprint_per_kwh = 0.185
gas_propane_carbon_footprint_per_kwh = 0.138

# Calculate spill carbon footprint in kilograms of CO2 per second
incidence['spil_carbon_footprint'] = 0.0
incidence.loc[incidence['GasType'] == 'Gas natural', 'spil_carbon_footprint'] = incidence.loc[incidence['GasType'] == 'Gas natural', 'spil_to_kwh'] * gas_natural_carbon_footprint_per_kwh * (incidence['Severity'] / 100)
incidence.loc[incidence['GasType'] == 'Gas propano', 'spil_carbon_footprint'] = incidence.loc[incidence['GasType'] == 'Gas propano', 'spil_to_kwh'] * gas_propane_carbon_footprint_per_kwh * (incidence['Severity'] / 100)

# Print total spill carbon footprint in kilograms of CO2 per second
print(f"Total spill carbon footprint: {incidence['spil_carbon_footprint'].sum()} kg per second")

# Calculate the number of seconds in a month
seconds_per_month = 2419200

# Calculate total spill cost and carbon footprint for each pipeline
incidence['total_spil_price'] = incidence['spil_price'] * (incidence['MonthsLastRev'] * (seconds_per_month / 2))
incidence['total_spil_carbon_footprint'] = incidence['spil_carbon_footprint'] * (incidence['MonthsLastRev'] * (seconds_per_month / 2))

# Print total spill cost and carbon footprint for all pipelines
print(f"Total spill cost: {incidence['total_spil_price'].sum()} euros")
print(f"Total spill carbon footprint: {incidence['total_spil_carbon_footprint'].sum()} kg")

Total spill cost: 2.051522180826397 euros per second
Total spill carbon footprint: 5.234185670604507 kg per second
Total spill cost: 53461411.038721636 euros
Total spill carbon footprint: 138476901.23885608 kg


In [22]:
print(f"Total spill carbon footprint by gas type:")
grouped_data = incidence.groupby('GasType')
for group_name, group_data in grouped_data:
    total_carbon_footprint =( group_data['total_spil_carbon_footprint'].sum()/17520)
    print(f"{group_name}: {total_carbon_footprint} kg")
    

Total spill carbon footprint by gas type:
Gas natural: 6166.654429529961 kg
Gas propano: 1737.278289582829 kg


In [None]:
inc = incidence.groupby('GasType').()

In [35]:
incs = incidence['Severity'].mean()

In [36]:
incs

2.6853228962818005

In [9]:
print(f"Total spill cost by gas type:\n{incidence.groupby('gastype')['total_spil_price'].sum()}")

print(f"Total cost by gas type:")
grouped_data = incidence.groupby('GasType')
for group_name, group_data in grouped_data:
    total_spil_price =( group_data['total_spil_price'].sum()/17520)
    print(f"{group_name}: {ttotal_spil_price} euros")

Total spill cost by gas type:
GasType
Gas natural    2.919994e+07
Gas propano    2.426147e+07
Name: total_spil_price, dtype: float64


In [15]:
print(f"Total cost by gas type:")
grouped_data = incidence.groupby('GasType')
for group_name, group_data in grouped_data:
    total_spil_price = (group_data['total_spil_price'].sum()/17520)
    print(f"{group_name}: {total_spil_price} euros")

Total cost by gas type:
Gas natural: 1666.6633593324223 euros
Gas propano: 1384.7870424210957 euros


# Strategy parameters

In [6]:
delete = [
    'MaintenanceId',
    'InspectionDate',
    'Incidence'
]
inspections = inspections.drop(delete, axis = 1)

merged = pd.merge(fulmerged, inspections, on="PipeId")

In [7]:
merged.loc[:, 'volumetesttest'] = (3.14/4) * ((merged['Diameter']/10).apply(np.sqrt)) * (merged['Pressure']/8.314*298.15) * 0.8

In [14]:
merged = merged[merged['Incidence'] == 1]
merged = merged[merged['InspectionYear'] > 2018]

In [17]:
mer=merged.groupby('Incidence').count()

In [18]:
mer

Unnamed: 0_level_0,PipeId,Province,Town,YearBuilt,Material,GasType,Diameter,Length,Pressure,NumConnections,...,InspectionYear,MonthsLastRev,Severity,volumetesttest,spil_to_cubicmeters,spil_to_kwh,spil_price,spil_carbon_footprint,total_spil_price,total_spil_carbon_footprint
Incidence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,186784,186784,186784,186784,186784,186784,186784,186784,186784,186784,...,186784,186784,186784,186784,186784,186784,186784,186784,186784,186784


In [20]:
count = merged['PipeId'].count()
# Convert spill volume from cubic centimeters to cubic meters
merged['spil_to_cubicmeters'] = merged['volumetesttest'] / 100 

# Convert spill volume in cubic meters to energy in kilowatt-hours
merged['spil_to_kwh'] = merged['spil_to_cubicmeters'] / 2.244

# Define gas prices in euros per kilowatt-hour
gas_natural_price_per_kwh = 0.05
gas_propane_price_per_kwh = 0.11

# Calculate spill cost in euros per second
merged['spil_price'] = 0.0
merged.loc[merged['GasType'] == 'Gas natural', 'spil_price'] = merged.loc[merged['GasType'] == 'Gas natural', 'spil_to_kwh'] * gas_natural_price_per_kwh * (merged['Severity'] / 100)
merged.loc[merged['GasType'] == 'Gas propano', 'spil_price'] = merged.loc[merged['GasType'] == 'Gas propano', 'spil_to_kwh'] * gas_propane_price_per_kwh * (merged['Severity'] / 100)

# Print total spill cost in euros per second
print(f"Total spill cost: {merged['spil_price'].sum()} euros per second")

# Define carbon footprint in kilograms of CO2 per kilowatt-hour
gas_natural_carbon_footprint_per_kwh = 0.185
gas_propane_carbon_footprint_per_kwh = 0.138

# Calculate spill carbon footprint in kilograms of CO2 per second
merged['spil_carbon_footprint'] = 0.0
merged.loc[merged['GasType'] == 'Gas natural', 'spil_carbon_footprint'] = merged.loc[merged['GasType'] == 'Gas natural', 'spil_to_kwh'] * gas_natural_carbon_footprint_per_kwh * (merged['Severity'] / 100)
merged.loc[merged['GasType'] == 'Gas propano', 'spil_carbon_footprint'] = merged.loc[merged['GasType'] == 'Gas propano', 'spil_to_kwh'] * gas_propane_carbon_footprint_per_kwh * (merged['Severity'] / 100)

# Print total spill carbon footprint in kilograms of CO2 per second
print(f"Total spill carbon footprint: {merged['spil_carbon_footprint'].sum()} kg per second")

# Calculate the number of seconds in a month
seconds_per_month = 2419200

# Calculate total spill cost and carbon footprint for each pipeline
merged['total_spil_price'] = merged['spil_price'] * (merged['MonthsLastRev'] * (seconds_per_month / 2))
merged['total_spil_carbon_footprint'] = merged['spil_carbon_footprint'] * (merged['MonthsLastRev'] * (seconds_per_month / 2))

# Print total spill cost and carbon footprint for all pipelines
print(f"Total spill cost: {merged['total_spil_price'].sum()} euros")
print(f"Total spill carbon footprint: {merged['total_spil_carbon_footprint'].sum()} kg")

Total spill cost: 204.86998179136685 euros per second
Total spill carbon footprint: 621.3065575063988 kg per second
Total spill cost: 5676670002.580232 euros
Total spill carbon footprint: 17218525497.53696 kg


In [10]:
0.0010968283246496854*60*60*24*365*

34589.57804615248

In [129]:
merged.sample(50)

Unnamed: 0,PipeId,Incidence,Province,Town,YearBuilt,Material,GasType,Diameter,Length,Pressure,...,InspectionYear,MonthsLastRev,Severity,volumetesttest,spil_to_cubicmeters,spil_to_kwh,spil_price,spil_carbon_footprint,total_spil_price,total_spil_carbon_footprint
2173944,189507821,1,Castellón,Castellon de la Plana/Cas,1950,PE,Gas natural,90.0,17.168,4.0,...,2019,24,4,270.249988,2.7025,1.204323,0.002409,0.008912,69923.932717,258718.551052
1486023,30169555,1,Toledo,Carranque,2017,PE,Gas propano,20.0,18.557,1.7,...,2019,22,4,54.143753,0.541438,0.241282,0.001062,0.001332,28251.573373,35442.882958
3414311,52358987,1,Girona,Girona,1978,PN,Gas natural,110.0,8.4,4.0,...,2020,24,4,298.772603,2.987726,1.331429,0.002663,0.009853,77303.816229,286024.120048
3047798,228311988,1,Barcelona,Manresa,1990,PE,Gas natural,160.0,125.942,0.025,...,2020,24,4,2.252083,0.022521,0.010036,2e-05,7.4e-05,582.699439,2155.987925
3162717,45920334,1,Toledo,Talavera de la Reina,1950,PE,Gas natural,63.0,20.773,4.0,...,2020,24,4,226.107362,2.261074,1.007609,0.002015,0.007456,58502.559402,216459.469788
263384,416311666,1,Barcelona,Cornella de Llobregat,1986,PE,Gas natural,63.0,143.183,4.0,...,2019,24,4,226.107362,2.261074,1.007609,0.002015,0.007456,58502.559402,216459.469788
2484160,134014225,1,Palencia,Palencia,1992,PE,Gas natural,160.0,83.943,4.0,...,2019,24,4,360.333317,3.603333,1.605763,0.003212,0.011883,93231.910289,344958.06807
2389254,191043151,1,Barcelona,Barcelona,1973,FD,Gas natural,150.0,22.92,0.025,...,2019,24,4,2.18057,0.021806,0.009717,1.9e-05,7.2e-05,564.196306,2087.526332
2842225,45853012,1,Albacete,Albacete,1993,PE,Gas natural,90.0,61.669,4.0,...,2019,23,4,270.249988,2.7025,1.204323,0.002409,0.008912,67010.43552,247938.611425
2916555,191029508,1,Barcelona,Castellar Del Valles,1987,PE,Gas natural,110.0,22.549,0.1,...,2019,24,4,7.469315,0.074693,0.033286,6.7e-05,0.000246,1932.595406,7150.603001


In [125]:
incidence.count()

Unnamed: 0                          2555
PipeId                              2555
InspectionYear                      2555
MonthsLastRev                       2555
Severity                            2555
Incidence                           2555
Province                            2555
Town                                2555
Material                            2555
GasType                             2555
Diameter                            2555
Length                              2555
Pressure                            2555
NumConnections                      2555
NumConnectionsUnder                 2555
BoolBridle                          2555
volumetesttest                      2555
spil percentage                     2555
spil_to_cubicmeters                 2555
spil_to_kwh                         2555
spil_price                          2555
spil_carbon_footprint               2555
total_spil_price                    2555
total_spil_carbon_footprint         2555
total price mult

In [16]:
xgu = pd.read_csv('XGBOOSTULTIMATE.csv',sep = ',')

Unnamed: 0_level_0,PipeId
Incidence,Unnamed: 1_level_1
0.023512,2
0.023899,1
0.023955,1
0.024137,3
0.024315,2
...,...
0.978271,1
0.978641,1
0.978886,1
0.978915,1


In [37]:
xgu.reset_index()

Unnamed: 0,index,PipeId,Incidence
0,0,446859944,0.069545
1,1,428124500,0.239829
2,2,438428871,0.059600
3,3,429034569,0.377791
4,4,411184477,0.309308
...,...,...,...
909728,909728,235426649,0.117816
909729,909729,235426673,0.210248
909730,909730,235426707,0.175256
909731,909731,190956601,0.180414


In [38]:
xgu_n = xgu[xgu['Incidence'] > 0.5]

In [39]:
xgu_n

Unnamed: 0,PipeId,Incidence
17,448689302,0.525198
74,184053681,0.645414
233,56904825,0.546613
234,56905395,0.625326
430,51233505,0.552729
...,...,...
909704,190956806,0.549436
909705,190956820,0.772090
909721,190957150,0.708402
909722,190956594,0.669384
