# **Network Exploration**

## **Imports**

In [1]:
import os
import sys
from pathlib import Path
from warnings import filterwarnings
from dotenv import load_dotenv
import tweepy
import numpy as np
import pandas as pd
import ast

load_dotenv()
filterwarnings("ignore")

PATH = os.getcwd()
PROJECT = str(Path(PATH).parents[1])
TWITTER_USERNAME = os.getenv("TWITTER_USERNAME")
TWITTER_API_KEY = os.getenv("TWITTER_API_KEY")
TWITTER_API_SECRET = os.getenv("TWITTER_API_SECRET")
TWITTER_API_BEARER_TOKEN = os.getenv("TWITTER_API_BEARER_TOKEN")

client = tweepy.Client(TWITTER_API_BEARER_TOKEN, wait_on_rate_limit=True)

## **Read**

In [2]:
df = pd.read_csv("data/users_followers.csv")
df.following = df.following.apply(ast.literal_eval)
df.head()

Unnamed: 0,user,following
0,1210565171107815429,"[1281443510, 632966383, 27631809, 14922097, 22..."
1,3432386097,"[33836629, 188343397, 235684766, 17268874, 439..."
2,1120633726478823425,[]
3,4398626122,[]
4,48008938,"[1464797992872034304, 630995607, 27493883, 123..."


In [3]:
df = df.explode('following')
df

Unnamed: 0,user,following
0,1210565171107815429,1281443510
0,1210565171107815429,632966383
0,1210565171107815429,27631809
0,1210565171107815429,14922097
0,1210565171107815429,228132689
...,...,...
128,620047515,9624742
128,620047515,1967216306
128,620047515,73238146
128,620047515,11134252


## **Play with Torch**

In [13]:
import torch
from torch_geometric.data import Data
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.datasets import Planetoid

edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)
data

Data(x=[3, 1], edge_index=[2, 4])

`data.x`: Node feature matrix with shape `[num_nodes, num_node_features]`

`data.edge_index`: Graph connectivity in COO format with shape `[2, num_edges]` and type torch.long

`data.edge_attr`: Edge feature matrix with shape `[num_edges, num_edge_features]`

`data.y`: Target to train against (may have arbitrary shape), e.g., node-level targets of shape `[num_nodes, *]` or graph-level targets of shape `[1, *]`

`data.pos`: Node position matrix with shape `[num_nodes, num_dimensions]`

In [8]:
print(data.keys)
print(data['x'])

['x', 'edge_index']
tensor([[-1.],
        [ 0.],
        [ 1.]])


In [14]:
dataset = Planetoid(root='/tmp/Cora', name='Cora')

In [15]:
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_1 = GCNConv(dataset.num_node_features, 16)
        self.conv_2 = GCNConv(16, dataset.num_classes)
    
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv_1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training = self.training)
        x = self.conv_2(x, edge_index)
        return F.log_softmax(x, dim=1)

In [20]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.7980
