In [1]:
import pandas as pd
import numpy as np

In [2]:
DATA_PATH = 'data/soc-sign-bitcoinotc.csv'
ROWS_NUMBER = 10
RATING_LIMIT = 8
TOP_FIVE = 5
D_FACTOR = 0.15
COMPLEMENT_D_FACTOR = 1 - D_FACTOR

In [3]:
def create_investors_dict(rows, columns, investors):
    '''Creates a dictionary which correlates each source investor to all the target investors which the source has done a transaction'''
    investors_dict = {}
    for k in investors:
        investors_dict[k] = []
        
    for i in range(len(rows)):
        source_invstor, target_invstor = rows[i], columns[i]
        investors_dict[source_invstor].append(target_invstor)
    return investors_dict

In [4]:
def get_transition_matrix(rows, columns, investors):
    n_investors = len(investors)
    m = np.zeros(shape = (n_investors, n_investors))
    investors_dict = create_investors_dict(rows, columns, investors)
        
    for i in range(n_investors):
        source = investors[i]
        for j in range(n_investors):
            target = investors[j]
            if target in investors_dict[source]: 
                m[j][i] = 1.0/len(investors_dict[source])
    
    return np.matrix(m)

In [5]:
def pagerank(v, m):
    global count
    count += 1
    if (sum(abs((m * v) - v)) > 0.001):
        return pagerank(m * v, m)
    return m * v

In [6]:
df = pd.read_csv(DATA_PATH, encoding = 'utf-8')
df = df.replace(np.NAN, "")

In [7]:
df.head(n=ROWS_NUMBER)

Unnamed: 0,6,2,4,1289241911.72836
0,6,5,2,1289242000.0
1,1,15,1,1289243000.0
2,4,3,7,1289245000.0
3,13,16,8,1289254000.0
4,13,10,8,1289254000.0
5,7,5,1,1289363000.0
6,2,21,5,1289371000.0
7,2,20,5,1289371000.0
8,21,2,5,1289381000.0
9,21,1,8,1289441000.0


In [8]:
df.columns = ['source', 'target', 'rating', 'time']

In [9]:
df.head(n=ROWS_NUMBER)

Unnamed: 0,source,target,rating,time
0,6,5,2,1289242000.0
1,1,15,1,1289243000.0
2,4,3,7,1289245000.0
3,13,16,8,1289254000.0
4,13,10,8,1289254000.0
5,7,5,1,1289363000.0
6,2,21,5,1289371000.0
7,2,20,5,1289371000.0
8,21,2,5,1289381000.0
9,21,1,8,1289441000.0


In [10]:
df = df[(df.rating >= RATING_LIMIT)]

In [11]:
df.head(n=ROWS_NUMBER)

Unnamed: 0,source,target,rating,time
3,13,16,8,1289254000.0
4,13,10,8,1289254000.0
9,21,1,8,1289441000.0
10,21,10,8,1289441000.0
11,21,8,9,1289441000.0
15,10,1,8,1289556000.0
17,10,21,8,1289556000.0
19,10,25,10,1289556000.0
40,13,1,8,1290521000.0
51,1,17,9,1290969000.0


In [12]:
source = list(df.source)
target = list(df.target)

In [13]:
investors = list(set(source) | set(list(target)))
n_investors = len(investors)

a = get_transition_matrix(source, target, investors)
b = (1.0/n_investors)*np.matrix(np.ones((n_investors,n_investors), dtype=int))
m = (COMPLEMENT_D_FACTOR*a) + (D_FACTOR*b) 
v = (1.0/n_investors)* np.matrix(np.ones((n_investors,1), dtype=int))


pr = pagerank(v, m)
r_df = pd.DataFrame({'investor': investors, 'investor_rank': [cell.item(0,0) for cell in pr]})

NameError: name 'count' is not defined

In [None]:
r_df.head(n=ROWS_NUMBER)

## Respostas & Respostas

### 01. Quantas iterações o PageRank precisou rodar até atingir convergência?

In [None]:
print(count)

### 02. Quais os 5 investidores mais importantes segundo o PageRank? Quais seus valores de PageRank?

In [None]:
r_df.sort_values(by=['investor_rank']).head(n=TOP_FIVE)

### 03. Como você poderia usar o PageRank caso você fosse um investidor em bitcoins?

 Visto que eu consigo obter a lista dos investidores mais importantes, eu faria investimento neles.