# Load trained model

In [1]:
from sklearn.neural_network import MLPClassifier
import os

data_dirpath = '/usr2/mamille2/tumblr/data/sample1k'

In [2]:
import pickle

model_path = os.path.join(data_dirpath, 'models', 'ffn_labels_baseline+exp1+exp2_all.pkl')
with open(model_path, 'rb') as f:
    model = pickle.load(f)

# Load training data

In [3]:
data_fpath = os.path.join(data_dirpath, 'output', 'features', 'ffn_labels_baseline+exp1+exp2_all_features.pkl')
with open(data_fpath, 'rb') as f:
    X_train, y_train, X_test, y_test = pickle.load(f)
    
print(X_train.shape)

(641403, 17775)


# Forward pass of data through weights, non-linearities

In [4]:
for layer in model.coefs_:
    print(layer.shape)

(17775, 100)
(100, 32)
(32, 50)
(50, 1)


In [5]:
for layer in model.intercepts_:
    print(layer.shape)

(100,)
(32,)
(50,)
(1,)


In [6]:
import numpy as np

def relu(x):
    return max(0,x)

relu_vec = np.vectorize(relu)

In [7]:
from scipy.sparse import csr_matrix

neuron_values = [] # flattened, should be 100 + 32 + 50 + 1
    
# Input to first layer
neuron_values.append(relu_vec(csr_matrix.dot(X_train, model.coefs_[0]) + model.intercepts_[0]))

for i in range(1, len(model.coefs_)-1):
    print(i)
    layer_values = relu_vec(np.dot(neuron_values[i-1], model.coefs_[i]) + model.intercepts_[i])
    neuron_values.append(layer_values)
    print(type(layer_values))
    print(layer_values.shape)
    print()
    
print(len(neuron_values))

1
<class 'numpy.ndarray'>
(641403, 32)

2
<class 'numpy.ndarray'>
(641403, 50)

3


In [8]:
# Flatten into a table
neuron_values_arr = np.hstack(neuron_values)
neuron_values_arr.shape

(641403, 182)

In [13]:
# Save out table
np.save(os.path.join(data_dirpath, 'neural_pathways', 'ffn_labels_baseline+exp1+exp2_pathways.npy'), neuron_values_arr)

# Predictions, actual

In [12]:
# Save gold
len(y_train)

641403

In [14]:
np.savetxt(os.path.join(data_dirpath, 'output', 'predictions', 'ffn_labels_baseline+exp1+exp2_train_actual.txt'), y_train)

# Linear probes

## Follower ID

In [5]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split

data_dirpath = '/usr2/mamille2/tumblr/data/sample1k'

# Load feature info
feature_fpath = os.path.join(data_dirpath, 'feature_tables', 'reblog_features.csv')
features = pd.read_csv(feature_fpath)

train, test = train_test_split(features, test_size=0.1, random_state=12345)

print(len(train['tumblog_id_follower']))

train.loc[:, ['tumblog_id_follower']].to_csv(os.path.join(data_dirpath, 'output', 
                                                            'features', 'ffn_labels_baseline+exp1+exp2_follower_ids.csv'), index=False)

641403
