# Projeto de classificação

## Componentes do grupo
- $10819033$ - Alberto Campos Neves
- $11345555$ - Breno Alves de Sousa
- $11215819$ - Daniel Martins Vieira
- $11297274$ - Diogo CAstanho Emidio
- $11275193$ - Vinícius Eduardo De Araújo


## Bibliotecas, dados e funções que serão usadas

In [None]:
network_directory_paths = "../input/networksclassification/networks"

In [None]:
import community as community_louvain
import matplotlib.pyplot as plt
import networkx as nx
import seaborn as sns
import pandas as pd
import numpy as np
import math
import re

from networkx.algorithms.community import greedy_modularity_communities

from scipy.stats import moment
from scipy import stats

from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [None]:
scaler = StandardScaler()
pca = PCA(2)
lda = LinearDiscriminantAnalysis()
knn = KNeighborsClassifier()
model = RandomForestClassifier()

In [None]:
def get_largest_component(network):
    # Sort by the biggest component
    connected_components = sorted(nx.connected_components(network), key=len, reverse=True)
    
    # Get the largest component
    network = network.subgraph(connected_components[0])
    
    return network

In [None]:
def read_network_from_csv(path, edgeColumnsNames, nodesColumnsNames, sourceColumn = "source", targetColumn = "target", indexColumn = "index", edgeAttr = None, nodesAttr = None, pos = None, getLargestComponente = False):
    edges = pd.read_csv("{}/edges.csv".format(path))
    nodes = pd.read_csv("{}/nodes.csv".format(path))

    # Rename Columns
    edges.columns = edgeColumnsNames
    nodes.columns = nodesColumnsNames

    # Create the network based on edges
    network = nx.from_pandas_edgelist(edges, source = sourceColumn, target = targetColumn, edge_attr = edgeAttr)

    # Send the information about the nodes to the network
    if nodesAttr is not None:
        properties = [(index, nodes.iloc[index, :][nodesAttr].to_dict()) for index in nodes[indexColumn]]
        network.add_nodes_from(properties)
    
    if getLargestComponente:
        network = get_largest_component(network)

    # Get pos
    if pos is not None:
        posRegex = re.compile(r"[-+]?\d*\.\d+|\d+", re.MULTILINE)
        pos = dict((index, np.array(posRegex.findall(nodes.iloc[index, :][pos]), dtype = np.double)) for index in nodes[indexColumn])

        return network, pos
    
    return network, None

In [None]:
def calcular_momentum(network, n):
    soma = 0
    for i in network.nodes:
        soma = soma + (network.degree(i) ** n)
    
    return soma / len(network)

In [None]:
# Função tirada do notebook da aula
def modularity(G, communities):
    c = np.zeros(len(G.nodes()))
    nc = 0
    for k in range(0,len(communities)):
        c[sorted(communities[k])]=nc
        nc = nc+1

    A = nx.adjacency_matrix(G)
    N = len(G)
    M = G.number_of_edges()
    Q = 0
    for i in np.arange(0,N):
        ki = len(list(G.neighbors(i)))
        for j in np.arange(0,N):
            if(c[i]==c[j]):
                kj = len(list(G.neighbors(j)))
                Q = Q + A[i,j]-(ki*kj)/(2*M)
    Q = Q/(2*M)
    return Q

In [None]:
def average_shortest_path(network):
    subgraphs = [network.subgraph(c).copy() for c in nx.connected_components(network)]

    return  math.fsum(nx.average_shortest_path_length(sg) for sg in subgraphs) / len(subgraphs)

In [None]:
def measure(network, weight = None):
    k1 = calcular_momentum(network, 1)
    k2 = calcular_momentum(network, 2)
    variance = k2 - k1 ** 2
    average_clustering = nx.average_clustering(network)
    average_shortest_path_n = average_shortest_path(network)
    assortativity = nx.degree_assortativity_coefficient(network)

    return [k1, k2, variance, average_clustering, average_shortest_path_n, assortativity]

In [None]:
def generate_erdos_renyi(network, generate = 80):
    all_measures = []

    for i in range(generate):
        n = len(network.nodes)
        p = calcular_momentum(network, 1) / (n - 1)

        random_network = nx.gnp_random_graph(n, p, seed = i, directed = False)
        m = measure(random_network)
        m.append("ErdosRenyi")

        all_measures.append(m)
    
    return all_measures

In [None]:
def generate_watts_strogatz(network, generate = 80, p = 0.1):
    all_measures = []

    for i in range(generate):
        n = len(network.nodes)
        k = int(calcular_momentum(network, 1))

        random_network = nx.watts_strogatz_graph(n, k, p, seed = i)
        m = measure(random_network)
        m.append("WattsStrogatz")

        all_measures.append(m)
    
    return all_measures

In [None]:
def generate_barabasi_albert(network, generate = 80):
    all_measures = []

    for i in range(generate):
        n = len(network.nodes)
        m = int(calcular_momentum(network, 1) / 2)

        random_network = nx.barabasi_albert_graph(n, m, seed = i)
        m = measure(random_network)
        m.append("BarabasiAlbert")

        all_measures.append(m)
    
    return all_measures

In [None]:
def generate_random_networks_df(networks):
    return pd.DataFrame(networks, columns = ['k1','k2',"k_variance", "average_clustering", "average_shortest_paths", "assortativity", "class"])

## Classificação de redes sociais

As redes que serão usadas serão as seguintes:
- [American Revolutionary groups (1765-1783):](https://networks.skewed.de/net/revolution) A bipartite network of the memberships of notable people and organizations, from the American Revolution (1765-1783) between users and groups on YouTube, extracted from a larger YouTube network in 2007.
- [Game of Thrones coappearances:](https://networks.skewed.de/net/game_thrones): Network of coappearances of characters in the Game of Thrones series, by George R. R. Martin, and in particular coappearances in the book "A Storm of Swords." Nodes are unique characters, and edges are weighted by the number of times the two characters' names appeared within 15 words of each other in the text.
- [Maier Facebook friends (2014):](https://networks.skewed.de/net/facebook_friends) A small anonymized Facebook ego network, from April 2014. Nodes are Facebook profiles, and an edge exists if the two profiles are "friends" on Facebook. Metadata gives the social context for the relationship between ego and alter.

### American Revolutionary groups (1765-1783)

A bipartite network of the memberships of notable people and organizations, from the American Revolution (1765-1783) between users and groups on YouTube, extracted from a larger YouTube network in 2007.

In [None]:
network, pos = read_network_from_csv(network_directory_paths + "/american_revolutioanry_groups", ["source", "target"], ["index", "pos"], edgeAttr = None, nodesAttr = None, pos = "pos")

network

In [None]:
plt.figure(figsize = (20, 20))

nx.draw_networkx_nodes(network, pos, node_size = 50, node_color = "darkred")
nx.draw_networkx_edges(network, pos, alpha = 0.4)

plt.show()

In [None]:
random_er = generate_erdos_renyi(network)
random_ws = generate_watts_strogatz(network)
random_ba = generate_barabasi_albert(network)

random_networks_df = generate_random_networks_df(random_er + random_ws + random_ba)

random_networks_df

In [None]:
x_columns = list(random_networks_df.columns[:-1])
y_columns = list(random_networks_df.columns[-1:])

x_columns, y_columns

In [None]:
m = measure(network)
X = random_networks_df[x_columns]
y = random_networks_df[y_columns].values.ravel()
m = pd.DataFrame(np.array(m).reshape(1,-1),columns=x_columns,dtype=np.double)
m

In [None]:
features = pd.concat([X,m], axis=0)

transformer = scaler.fit(features)
X = pd.DataFrame(transformer.transform(X), columns=x_columns)
m = pd.DataFrame(transformer.transform(m), columns=x_columns)
m

In [None]:
lda.fit(X, y)
print("LDA result:",lda.predict(m))

knn.fit(X, y)
print("KNN result:",knn.predict(m))

In [None]:
features = pd.concat([X,m], axis=0)
pca.fit(features)
print(pca.explained_variance_ratio_)
X = pd.DataFrame(pca.transform(X))
m = pd.DataFrame(pca.transform(m))

In [None]:
sns.scatterplot(x=X[0], y=X[1], hue=y)
sns.scatterplot(x=m[0], y=m[1], color='red')

In [None]:
print(abs(pca.components_ ))

### Game of Thrones coappearances

Network of coappearances of characters in the Game of Thrones series, by George R. R. Martin, and in particular coappearances in the book "A Storm of Swords." Nodes are unique characters, and edges are weighted by the number of times the two characters' names appeared within 15 words of each other in the text.

In [None]:
network, pos = read_network_from_csv(network_directory_paths + "/game_of_thrones", ["source", "target", "weight"], ["index", "name", "pos"], edgeAttr = ["weight"], nodesAttr = ["name"], pos = "pos")

network

In [None]:
plt.figure(figsize = (20, 20))

nx.draw_networkx_nodes(network, pos, node_size = 50, node_color = "darkred")
nx.draw_networkx_edges(network, pos, alpha = 0.4)
nx.draw_networkx_labels(network, pos, labels = dict([(key, network.nodes[key]["name"]) for key in network.nodes]), font_color = "black", font_size = 10, font_weight = "bold", horizontalalignment = "center", verticalalignment = "bottom")
nx.draw_networkx_edge_labels(network, pos, edge_labels = dict([(key, network.edges[key]["weight"]) for key in network.edges]), font_size = 6)

plt.show()

In [None]:
random_er = generate_erdos_renyi(network)
random_ws = generate_watts_strogatz(network)
random_ba = generate_barabasi_albert(network)

random_networks_df = generate_random_networks_df(random_er + random_ws + random_ba)

random_networks_df

In [None]:
m = measure(network)
X = random_networks_df[x_columns]
y = random_networks_df[y_columns].values.ravel()
m = pd.DataFrame(np.array(m).reshape(1,-1),columns=x_columns,dtype=np.double)
m

In [None]:
features = pd.concat([X,m], axis=0)

transformer = scaler.fit(features)
X = pd.DataFrame(transformer.transform(X), columns=x_columns)
m = pd.DataFrame(transformer.transform(m), columns=x_columns)
m

In [None]:
lda.fit(X, y)
print("LDA result:",lda.predict(m))

knn.fit(X, y)
print("KNN result:",knn.predict(m))

In [None]:
features = pd.concat([X,m], axis=0)
pca.fit(features)
print(pca.explained_variance_ratio_)
X = pd.DataFrame(pca.transform(X))
m = pd.DataFrame(pca.transform(m))

In [None]:
sns.scatterplot(x=X[0], y=X[1], hue=y)
sns.scatterplot(x=m[0], y=m[1], color='red')

In [None]:
print(abs(pca.components_ ))

### Maier Facebook friends (2014)

A small anonymized Facebook ego network, from April 2014. Nodes are Facebook profiles, and an edge exists if the two profiles are "friends" on Facebook. Metadata gives the social context for the relationship between ego and alter.

In [None]:
network, pos = read_network_from_csv(network_directory_paths + "/maier_facebook_friends", ["source", "target", "_graphml_edge_id", "name"], ["index", "_graphml_vertex_id", "context_0", "context_1", "context_2", "context_3", "pos"], edgeAttr = None, nodesAttr = ["context_0"], pos = "pos", getLargestComponente = True)

network

In [None]:
plt.figure(figsize = (20, 20))

nx.draw_networkx_nodes(network, pos, node_size = 50, node_color = "darkred")
nx.draw_networkx_edges(network, pos, alpha = 0.4)
nx.draw_networkx_labels(network, pos, labels = dict([(key, network.nodes[key]["context_0"]) for key in network.nodes]), font_color = "black", font_size = 8, font_weight = "bold", horizontalalignment = "center", verticalalignment = "bottom", alpha = 0.6)

plt.show()

In [None]:
m = measure(network)

m

In [None]:
random_er = generate_erdos_renyi(network)
random_ws = generate_watts_strogatz(network)
random_ba = generate_barabasi_albert(network)

random_networks_df = generate_random_networks_df(random_er + random_ws + random_ba)

random_networks_df

In [None]:
x_columns = list(random_networks_df.columns[:-1])
y_columns = list(random_networks_df.columns[-1:])

x_columns, y_columns

In [None]:
m = measure(network)
X = random_networks_df[x_columns]
y = random_networks_df[y_columns].values.ravel()
m = pd.DataFrame(np.array(m).reshape(1,-1),columns=x_columns,dtype=np.double)
m

In [None]:
features = pd.concat([X,m], axis=0)

transformer = scaler.fit(features)
X = pd.DataFrame(transformer.transform(X), columns=x_columns)
m = pd.DataFrame(transformer.transform(m), columns=x_columns)
m

In [None]:
lda.fit(X, y)
print("LDA result:",lda.predict(m))

knn.fit(X, y)
print("KNN result:",knn.predict(m))

In [None]:
features = pd.concat([X,m], axis=0)
pca.fit(features)
print(pca.explained_variance_ratio_)
X = pd.DataFrame(pca.transform(X))
m = pd.DataFrame(pca.transform(m))

In [None]:
sns.scatterplot(x=X[0], y=X[1], hue=y)
sns.scatterplot(x=m[0], y=m[1], color='red')

In [None]:
print(abs(pca.components_ ))

## Classificação de redes biológicas

As redes que serão usadas serão as seguintes:
- [Metabolic network (C. elegans):](https://networks.skewed.de/net/celegans_metabolic) List of edges comprising the metabolic network of the nematode C. elegans.
- [Stelzl human interactome (2005):](https://networks.skewed.de/net/interactome_stelzl) A network of human proteins and their binding interactions. Nodes represent proteins and an edge represents an interaction between two proteins, as inferred via high-throughput Y2H experiments using bait and prey methodology.
- [Coulomb yeast interactome (2005):](https://networks.skewed.de/net/interactome_yeast) A network of protein-protein binding interactions among yeast proteins. Nodes represent proteins found in yeast (Saccharomyces cerevisiae) and an edge represents a binding interaction between two proteins.

### Metabolic network (C. elegans)

List of edges comprising the metabolic network of the nematode C. elegans.

In [None]:
network, pos = read_network_from_csv(network_directory_paths + "/metabolic_network", ["source", "target", "_graphml_edge_id", "weight"], ["index", "_graphml_vertex_id", "id", "name", "x", "y", "z", "pos"], edgeAttr = ["weight"], nodesAttr = None, pos = "pos")

network

In [None]:
plt.figure(figsize = (20, 20))

nx.draw_networkx_nodes(network, pos, node_size = 50, node_color = "darkred")
nx.draw_networkx_edges(network, pos, alpha = 0.4)
#nx.draw_networkx_labels(network, pos, labels = dict([(key, network.nodes[key]["name"]) for key in network.nodes]), font_color = "black", font_size = 10, font_weight = "bold", horizontalalignment = "center", verticalalignment = "bottom")
#nx.draw_networkx_edge_labels(network, pos, edge_labels = dict([(key, network.edges[key]["weight"]) for key in network.edges]), font_size = 6)

plt.show()

In [None]:
random_er = generate_erdos_renyi(network)
random_ws = generate_watts_strogatz(network)
random_ba = generate_barabasi_albert(network)

random_networks_df = generate_random_networks_df(random_er + random_ws + random_ba)

random_networks_df

In [None]:
x_columns = list(random_networks_df.columns[:-1])
y_columns = list(random_networks_df.columns[-1:])

x_columns, y_columns

In [None]:
m = measure(network)
X = random_networks_df[x_columns]
y = random_networks_df[y_columns].values.ravel()
m = pd.DataFrame(np.array(m).reshape(1,-1),columns=x_columns,dtype=np.double)
m

In [None]:
features = pd.concat([X,m], axis=0)

transformer = scaler.fit(features)
X = pd.DataFrame(transformer.transform(X), columns=x_columns)
m = pd.DataFrame(transformer.transform(m), columns=x_columns)
m

In [None]:
lda.fit(X, y)
print("LDA result:",lda.predict(m))

knn.fit(X, y)
print("KNN result:",knn.predict(m))

In [None]:
features = pd.concat([X,m], axis=0)
pca.fit(features)
print(pca.explained_variance_ratio_)
X = pd.DataFrame(pca.transform(X))
m = pd.DataFrame(pca.transform(m))

In [None]:
sns.scatterplot(x=X[0], y=X[1], hue=y)
sns.scatterplot(x=m[0], y=m[1], color='red')

In [None]:
print(abs(pca.components_ ))

### Stelzl human interactome (2005)

A network of human proteins and their binding interactions. Nodes represent proteins and an edge represents an interaction between two proteins, as inferred via high-throughput Y2H experiments using bait and prey methodology.

In [None]:
network, pos = read_network_from_csv(network_directory_paths + "/stelzl_human_interactome", ["source", "target"], ["index", "pos"], edgeAttr = None, nodesAttr = None, pos = "pos", getLargestComponente = True)

network

In [None]:
random_er = generate_erdos_renyi(network)
random_ws = generate_watts_strogatz(network)
random_ba = generate_barabasi_albert(network)

random_networks_df = generate_random_networks_df(random_er + random_ws + random_ba)

random_networks_df

In [None]:
x_columns = list(random_networks_df.columns[:-1])
y_columns = list(random_networks_df.columns[-1:])

x_columns, y_columns

In [None]:
m = measure(network)
X = random_networks_df[x_columns]
y = random_networks_df[y_columns].values.ravel()
m = pd.DataFrame(np.array(m).reshape(1,-1),columns=x_columns,dtype=np.double)
m

In [None]:
features = pd.concat([X,m], axis=0)

transformer = scaler.fit(features)
X = pd.DataFrame(transformer.transform(X), columns=x_columns)
m = pd.DataFrame(transformer.transform(m), columns=x_columns)
m

In [None]:
lda.fit(X, y)
print("LDA result:",lda.predict(m))

knn.fit(X, y)
print("KNN result:",knn.predict(m))

In [None]:
features = pd.concat([X,m], axis=0)
pca.fit(features)
print(pca.explained_variance_ratio_)
X = pd.DataFrame(pca.transform(X))
m = pd.DataFrame(pca.transform(m))

In [None]:
sns.scatterplot(x=X[0], y=X[1], hue=y)
sns.scatterplot(x=m[0], y=m[1], color='red')

In [None]:
print(abs(pca.components_ ))

### Coulomb yeast interactome (2005)

A network of protein-protein binding interactions among yeast proteins. Nodes represent proteins found in yeast (Saccharomyces cerevisiae) and an edge represents a binding interaction between two proteins.

In [None]:
network, pos = read_network_from_csv(network_directory_paths + "/coulomb_yeast_interactome", ["source", "target"], ["index", "pos"], edgeAttr = None, nodesAttr = None, pos = "pos", getLargestComponente = True)

network

In [None]:
random_er = generate_erdos_renyi(network)
random_ws = generate_watts_strogatz(network)
random_ba = generate_barabasi_albert(network)

random_networks_df = generate_random_networks_df(random_er + random_ws + random_ba)

random_networks_df

In [None]:
x_columns = list(random_networks_df.columns[:-1])
y_columns = list(random_networks_df.columns[-1:])

x_columns, y_columns

In [None]:
m = measure(network)
X = random_networks_df[x_columns]
y = random_networks_df[y_columns].values.ravel()
m = pd.DataFrame(np.array(m).reshape(1,-1),columns=x_columns,dtype=np.double)
m

In [None]:
features = pd.concat([X,m], axis=0)

transformer = scaler.fit(features)
X = pd.DataFrame(transformer.transform(X), columns=x_columns)
m = pd.DataFrame(transformer.transform(m), columns=x_columns)
m

In [None]:
lda.fit(X, y)
print("LDA result:",lda.predict(m))

knn.fit(X, y)
print("KNN result:",knn.predict(m))

In [None]:
features = pd.concat([X,m], axis=0)
pca.fit(features)
print(pca.explained_variance_ratio_)
X = pd.DataFrame(pca.transform(X))
m = pd.DataFrame(pca.transform(m))

In [None]:
sns.scatterplot(x=X[0], y=X[1], hue=y)
sns.scatterplot(x=m[0], y=m[1], color='red')

In [None]:
print(abs(pca.components_ ))

## Classificação de redes tecnólogicas

As redes que serão usadas serão as seguintes:
- [Air traffic control:](http://konect.cc/networks/maayan-faa/)  This network was constructed from the USA's FAA (Federal Aviation Administration) National Flight Data Center (NFDC), Preferred Routes Database. Nodes in this network represent airports or service centers and links are created from strings of preferred routes recommended by the NFDC.
- [Euroroads:](http://konect.cc/networks/subelj_euroroad/)  This is the international E-road network, a road network located mostly in Europe. The network is undirected; nodes represent cities and an edge between two nodes denotes that they are connected by an E-road.
- [Western US Power Grid:](https://networks.skewed.de/net/power) A network representing the Western States Power Grid of the United States, in which nodes are transforms or power relay points and two nodes are connected if a power line runs between them.

### Air traffic control

This network was constructed from the USA's FAA (Federal Aviation Administration) National Flight Data Center (NFDC), Preferred Routes Database. Nodes in this network represent airports or service centers and links are created from strings of preferred routes recommended by the NFDC.

In [None]:
network = nx.read_edgelist(network_directory_paths + "/air_trafic_control/out.maayan-faa")
network = get_largest_component(network)

network

In [None]:
pos = nx.spring_layout(network)

plt.figure(figsize = (20, 20))

nx.draw_networkx_nodes(network, pos, node_size = 50, node_color = "darkred")
nx.draw_networkx_edges(network, pos, alpha = 0.4)

plt.show()

In [None]:
random_er = generate_erdos_renyi(network)
random_ws = generate_watts_strogatz(network)
random_ba = generate_barabasi_albert(network)

random_networks_df = generate_random_networks_df(random_er + random_ws + random_ba)

random_networks_df

In [None]:
x_columns = list(random_networks_df.columns[:-1])
y_columns = list(random_networks_df.columns[-1:])

x_columns, y_columns

In [None]:
m = measure(network)
X = random_networks_df[x_columns]
y = random_networks_df[y_columns].values.ravel()
m = pd.DataFrame(np.array(m).reshape(1,-1),columns=x_columns,dtype=np.double)
m

In [None]:
features = pd.concat([X,m], axis=0)

transformer = scaler.fit(features)
X = pd.DataFrame(transformer.transform(X), columns=x_columns)
m = pd.DataFrame(transformer.transform(m), columns=x_columns)
m

In [None]:
lda.fit(X, y)
print("LDA result:",lda.predict(m))

knn.fit(X, y)
print("KNN result:",knn.predict(m))

In [None]:
features = pd.concat([X,m], axis=0)
pca.fit(features)
print(pca.explained_variance_ratio_)
X = pd.DataFrame(pca.transform(X))
m = pd.DataFrame(pca.transform(m))

In [None]:
sns.scatterplot(x=X[0], y=X[1], hue=y)
sns.scatterplot(x=m[0], y=m[1], color='red')

In [None]:
print(abs(pca.components_ ))

### Euroroad

This is the international E-road network, a road network located mostly in Europe. The network is undirected; nodes represent cities and an edge between two nodes denotes that they are connected by an E-road.


In [None]:
edges = pd.read_csv(network_directory_paths + "/euroroad/out.subelj_euroroad_euroroad", delimiter = " ")
nodes = pd.read_csv(network_directory_paths + "/euroroad/ent.subelj_euroroad_euroroad.city.name", delimiter = "|")
nodes["index"] = nodes.index + 1

edges

In [None]:
nodes

In [None]:
network = nx.from_pandas_edgelist(edges, source = "source", target = "target", edge_attr = None)
properties = [(index, {"name": nodes.iloc[index - 1, :]["name"]}) for index in nodes["index"]]
network.add_nodes_from(properties)

network = get_largest_component(network)

network

In [None]:
pos = nx.spring_layout(network)

plt.figure(figsize = (20, 20))

nx.draw_networkx_nodes(network, pos, node_size = 50, node_color = "darkred")
nx.draw_networkx_edges(network, pos, alpha = 0.4)
nx.draw_networkx_labels(network, pos, labels = dict([(key, network.nodes[key]["name"]) for key in network.nodes]), font_color = "black", font_size = 8, font_weight = "bold", horizontalalignment = "center", verticalalignment = "bottom", alpha = 0.6)


plt.show()

In [None]:
random_er = generate_erdos_renyi(network)
random_ws = generate_watts_strogatz(network)
random_ba = generate_barabasi_albert(network)

random_networks_df = generate_random_networks_df(random_er + random_ws + random_ba)

random_networks_df

In [None]:
x_columns = list(random_networks_df.columns[:-1])
y_columns = list(random_networks_df.columns[-1:])

x_columns, y_columns

In [None]:
m = measure(network)
X = random_networks_df[x_columns]
y = random_networks_df[y_columns].values.ravel()
m = pd.DataFrame(np.array(m).reshape(1,-1),columns=x_columns,dtype=np.double)
m

In [None]:
features = pd.concat([X,m], axis=0)

transformer = scaler.fit(features)
X = pd.DataFrame(transformer.transform(X), columns=x_columns)
m = pd.DataFrame(transformer.transform(m), columns=x_columns)
m

In [None]:
lda.fit(X, y)
print("LDA result:",lda.predict(m))

knn.fit(X, y)
print("KNN result:",knn.predict(m))

In [None]:
features = pd.concat([X,m], axis=0)
pca.fit(features)
print(pca.explained_variance_ratio_)
X = pd.DataFrame(pca.transform(X))
m = pd.DataFrame(pca.transform(m))

In [None]:
sns.scatterplot(x=X[0], y=X[1], hue=y)
sns.scatterplot(x=m[0], y=m[1], color='red')

In [None]:
print(abs(pca.components_ ))

### Western US Power Grid

A network representing the Western States Power Grid of the United States, in which nodes are transforms or power relay points and two nodes are connected if a power line runs between them.

In [None]:
network, pos = read_network_from_csv(network_directory_paths + "/western_us_power_grid", ["source", "target"], ["index", "pos"], edgeAttr = None, nodesAttr = None, pos = "pos")

network

In [None]:
random_er = generate_erdos_renyi(network)
random_ws = generate_watts_strogatz(network)
random_ba = generate_barabasi_albert(network)

random_networks_df = generate_random_networks_df(random_er + random_ws + random_ba)

random_networks_df

In [None]:
x_columns = list(random_networks_df.columns[:-1])
y_columns = list(random_networks_df.columns[-1:])

x_columns, y_columns

In [None]:
m = measure(network)
X = random_networks_df[x_columns]
y = random_networks_df[y_columns].values.ravel()
m = pd.DataFrame(np.array(m).reshape(1,-1),columns=x_columns,dtype=np.double)
m

In [None]:
features = pd.concat([X,m], axis=0)

transformer = scaler.fit(features)
X = pd.DataFrame(transformer.transform(X), columns=x_columns)
m = pd.DataFrame(transformer.transform(m), columns=x_columns)
m

In [None]:
lda.fit(X, y)
print("LDA result:",lda.predict(m))

knn.fit(X, y)
print("KNN result:",knn.predict(m))

In [None]:
features = pd.concat([X,m], axis=0)
pca.fit(features)
print(pca.explained_variance_ratio_)
X = pd.DataFrame(pca.transform(X))
m = pd.DataFrame(pca.transform(m))

In [None]:
sns.scatterplot(x=X[0], y=X[1], hue=y)
sns.scatterplot(x=m[0], y=m[1], color='red')

In [None]:
print(abs(pca.components_ ))

# Conclusões

A partir dos nossos experimentos e predições, chegamos à conclusão de que o tipo da rede influencia significativamente na classificação dessa rede.

Como podemos notar nos gráficos gerados, o tipo da rede determinou muito quanto à predição dos algoritmos de classificação LDA e KNN.

Por exemplo, as redes sociais tenderam a ser classificadas como Barabasi-Albert, visto a curta distancia euclidiana da rede a ser classificada e as redes geradas.
Já nas redes biológicas, os resultados se mostraram bastante imprecisos, visto a grande distância entre rede a ser classificada e as redes geradas.
Apesar da alta distancia ainda existir nas redes tecnológicas, as predições se mostram mais precisas, visto que elas sempre estão mais proximas de redes geradas pelo algoritmo de ErdosRenyi.