In [1]:
import numpy as np
import pandas   as pd
import matplotlib.pyplot as plt
import networkx as nx
import os

### Creiamo un unico dataframe che contiene tutto
Labels(OK)  Degree(OK)	Betweenness(OK)	Voltage	Latitude(OK) Longitude(OK) Old_Labels(OK)

In [None]:
df_PowerGrid  = pd.read_csv('./PowerGrid/node_coordinates.nodelist', delimiter=' ',names=["Label","Latitude","Longitude"],header=None)
degree        = pd.read_csv("./PowerGrid/gridkit_north_america.nodelist", delimiter=' ',names=["Label","Degree","node_betweenness"],header=0)
labels        = pd.read_csv("./PowerGrid/old_new_labels.txt", header=None, sep=" ", names=["old_labels", "new_labels"])

df_Nodes = pd.DataFrame(columns=[ "Degree","Betweenness","Voltage","Latitude","Longitude","Old_Labels"], 
                            index = df_PowerGrid["Label"])
df_Nodes["Latitude"]  = df_PowerGrid["Latitude"]
df_Nodes["Longitude"] = df_PowerGrid["Longitude"]
df_Nodes["Degree"] = degree["Degree"]
df_Nodes["Betweenness"] = degree["node_betweenness"]
df_Nodes["Old_Labels"] = labels["old_labels"]

In [None]:
# Qui prepariamo la colonna del voltaggio
grid   = pd.read_csv('./PowerGrid/gridkit_north_america-highvoltage-vertices_original.csv',delimiter=',',index_col="v_id")

# Prima selezioniamo i nodi connessi coi vecchi labels salvati in old_labels, infatti grid ha 16174, mentre df_Nodes (i nodi connessi) sono 16167, quindi escludiamo i 7 mancanti
grid = grid.loc[labels["old_labels"]]

# Riportiamo tutto ai nuovi indici
grid.rename({k:v for k,v in zip(labels["old_labels"],labels["new_labels"])}, inplace=True) # Relabelling old to new indices

# Given that some stations have multiple power lines, this function takes the average
def avg_voltage(volt_list):
    volt = []
    for item in volt_list:
        x = str(item).split(";")
        x = [float(y) for y in x] if len(x) > 1 else float(x[0])
        x = sum(x) / len(x) if isinstance(x, list) else x
        volt.append(x)
    return volt

# grid["voltage"] =  avg_voltage(grid["voltage"]) 

df_Nodes["Voltage"]    = avg_voltage(grid["voltage"]) 
df_Nodes["Frequency"]  = grid["frequency"]

In [None]:
# Qui ci occupiamo del tipo di griglia
grid["typ"] 
unique_list = list(set(grid["typ"])) # Elementi unici
my_dict = {'sub_station': 'substation'} # Relabelling

type_grid = [my_dict.get(item, item) for item in grid["typ"]]
df_Nodes["Type"] = type_grid 

In [None]:
df_Nodes_Final = df_Nodes.reindex(columns=['Degree', 'Betweenness', 'Voltage', 'Type', 'Frequency','Latitude', 'Longitude',   'Old_Labels'])

In [None]:
df_Nodes_Final.to_csv("./PowerGrid/vertices_grid.csv", sep=' ')


### Load network of US power grid

In [4]:
G = nx.read_edgelist("./PowerGrid/gridkit_north_america.el",nodetype=int)

In [22]:
A = [value[1] for value in G.degree]
avgD = sum(A)/len(A)  # Mean degree
print(avgD)

2.504113317251191


### Compute 2-core 

In [23]:
G2 = nx.k_core(G,k=2)

In [24]:
len(G2.nodes())

11404

### Check nodes in LCC (Largest connected component)

In [None]:
# [item for item in nx.connected_components(G)]

C = list(nx.connected_components(G))
for compon in C:
    print(len(compon))

In [None]:
Lcc = [int(x) for x in C[1]]   # Nodes in LCC

In [None]:
# Save Lcc nodes in a file
with open('./PowerGrid/Lcc_nodelist.txt', 'w') as file:
    for number in Lcc:
        file.write(str(number) + '\n')

In [None]:
# Add column to the full power grid dataframe 
df_PowerGrid = pd.read_csv("./PowerGrid/vertices_grid.csv", sep=" ",index_col="Label")
df_PowerGrid.loc[Lcc,"2-core_lcc"] = int(1)
df_PowerGrid["2-core_lcc"].fillna(0,inplace=True)
df_PowerGrid["2-core_lcc"] = df_PowerGrid["2-core_lcc"].astype(int)

In [None]:
df_PowerGrid.to_csv("./PowerGrid/vertices_grid_2.csv", sep=' ')

### Compute betweenness with networkx

In [None]:
b = nx.betweenness_centrality(G)  # About 9 min to compute for 16k nodes
d= {int(key):value for key,value in b.items()}

In [None]:
# plt.hist(d.values(),30,range=(0.001,0.02));
# dd =G.degree()
# dd = {int(key):value for key,value in dict(dd).items()}
nodes = pd.DataFrame({'node_degree':dd,'node_betweenness':d},).sort_index()

In [None]:

nodes.to_csv('./Data/PowerGrid/test_bet.nodelist',sep=' ')

In [None]:
plt.scatter(nodes["node_degree"],nodes["node_betweenness"])
plt.show()

### Leggi edgelist originale e cambia i labels

In [None]:
G = nx.read_edgelist("./Data/PowerGrid/gridkit-north_america_original.el")

In [None]:
# Rename labels from 0 to N-1
old_labels = G.nodes()
GG = nx.convert_node_labels_to_integers(G,ordering="default")  # Inherit ordering from G.nodes() (default)

In [None]:
degree_sequence = list((d for n, d in GG.degree()))
nodelist = [int(string) for string in [n for n,d in GG.degree()]] 

In [None]:
# Salva la nodelist del grafo riordinato coi nuovi labels
with open('gridkit_north_america-highvoltage-vertices.nodelist', 'w') as f:
    for a,b in zip(nodelist,degree_sequence):
        f.write(str(a) + " " + str(b) + "\n")

In [None]:
# Salva la nodelist del grafo originale
degree_sequence_old = list((d for n, d in G.degree()))
nodelist_old        = [int(string) for string in [n for n,d in G.degree()]]
with open('gridkit_north_america-highvoltage-vertices_origin.nodelist', 'w') as f:
    for a,b in zip(nodelist_old, degree_sequence_old):
        f.write(str(a) + " " + str(b) + "\n")

In [None]:
# Salva la edgelist del nuovo grafo
with open('gridkit_north_america-highvoltage-vertices.el', 'w') as f:
    for a,b in GG.edges():
        f.write(str(a) + " " + str(b) + "\n")

In [None]:
# Salva la corrispondenza old_labels - new_labels
with open('old_new_labels.txt', 'w') as f:
    for a,b in zip(old_labels,GG.nodes()):
        f.write(str(a) + " " + str(b) + "\n")

### Estrai latitudine e longitudine dal file originale e salva i risultati usando i nuovi_label
Prendiamo la nodelist con i nuovi label, per ogni elemento troviamo il corrispondente nel file **old_new_label**, infine trovato questo corrispondente lo cerchiamo nel dataset originale ed estraiamo latitudine e longitudine, infine salviamo tutto in una nuova nodelist

In [None]:
file_grid     = "./Data/PowerGrid/gridkit_north_america-highvoltage-vertices_original.csv"
file_nodelist = "./Data/PowerGrid/gridkit_north_america.nodelist" # Qui ci sono i nuovi label
file_label    = "./Data/PowerGrid/old_new_labels.txt"
nodeslist     = pd.read_csv(file_nodelist, sep=" ", names=["new_labels", "degree"])
labels        = pd.read_csv(file_label, header=None, sep=" ", names=["old_labels", "new_labels"])
orig_data     = pd.read_csv(file_grid)

In [None]:
# Salva una nuova nodelist coi nuovi labels e latitudine e longitudine
with open('node_coordinates.nodelist', 'w') as f:

    for old_lab, new_lab in zip(labels["old_labels"], labels["new_labels"]):
        ind = list(orig_data["v_id"]).index(old_lab)    # Troviamo la riga corrispondente nel dataset originale
        lat, lon = orig_data.iloc[ind][["lat","lon"]]   # Estraiamo latitudine e longitudine
        f.write( str(new_lab) + " " + str(lat) + " " + str(lon) + "\n")

### Aggiungiamo una colonna con in nuovi label nel file 

In [None]:
# LEGGIAMO I DATI
file_grid     = "./Data/PowerGrid/gridkit_north_america-highvoltage-vertices_original.csv"
file_nodelist = "./Data/PowerGrid/gridkit_north_america.nodelist" # Qui ci sono i nuovi label
file_label    = "./Data/PowerGrid/old_new_labels.txt"
nodeslist     = pd.read_csv(file_nodelist, sep=" ", names=["new_labels", "degree"])
labels        = pd.read_csv(file_label, header=None, sep=" ", names=["old_labels", "new_labels"])
orig_data     = pd.read_csv(file_grid)

In [None]:
label_dict       = dict(zip(labels.old_labels, labels.new_labels))
relabeled_vector = [int(label_dict[item]) if item in label_dict.keys() else np.nan for item in orig_data.v_id ]

In [None]:
orig_data.head()

In [None]:
orig_data.to_csv("./Data/PowerGrid/gridkit_north_america-highvoltage-vertices_original.csv", sep=' ')

In [None]:
label_dict = dict(zip(labels.old_labels, labels.new_labels))
# relabeled_vector = np.vectorize(label_dict.get)([3,13322,8105])   # ([3,13322,8105]) = (old_labels)
relabeled_vector = np.vectorize(label_dict.get)(orig_data.v_id[0:1000])
relabeled_vector

### Storms

In [85]:
STORM_LIST  = ['INGRID', 'IRENE', 'EARL', 'KATE', 'SANDY', 'NATE', 'ISAAC', 'PAULA', 'MATTHEW', 'JOAQUIN', 'BILL', 
               'KATIA', 'HERMINE', 'ALEX', 'TOMAS', 'CRISTOBAL', 'IDA', 'KARL', 'ARTHUR', 'GONZALO', 'BERTHA']
filepath = "./US_storms_2009-2016.dat"
storm    = pd.read_csv(filepath,delimiter="|",index_col=0)
storm.head(3)

Unnamed: 0_level_0,long,lat,Serial_Num.x,time,Season.x,Num.x,Basin.x,Sub_basin.x,Name.x,ISO_time.x,Nature.x,wmo_wind.x,wmo_pres.x
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
54593,-75.400002,30.6,2009147N31285,2009-05-26 18:00:00,2009,1,,,ONE,2009-05-26 18:00:00,DS,25,1010
54594,-75.599998,31.700001,2009147N31285,2009-05-27 00:00:00,2009,1,,,ONE,2009-05-27 00:00:00,DS,25,1010
54595,-75.5,32.700001,2009147N31285,2009-05-27 06:00:00,2009,1,,,ONE,2009-05-27 06:00:00,DS,25,1010


In [117]:
with open("./test.csv","w") as file:
    file.write("Name,MaxForceWind,Season\n")
    for item in STORM_LIST:
        df = storm.loc[storm["Name.x"] == item]
        max_wind = df.loc[:,"wmo_wind.x"].max()
        season   = df.loc[:,"Season.x"].iloc[0]
        file.write(f"{item},{max_wind},{season}\n")
# item = STORM_LIST[1]
# df = storm.loc[storm["Name.x"] == item]
# df.head()

### Test

In [77]:
import numpy as np
n = 100
x = np.random.randn(n)

In [73]:
rec = []
for i in range(0,n-1):
    rec.append(x[:i+1].argmax())

In [79]:
c = np.array([])