In [1]:
# Load dependencies for this Jupyter Notebook
import pandas as pd
import time
import numpy as np
from functools import reduce
from lib.util import fetch_thread

import matplotlib.pyplot as plt

#latent-factor modeling:
from sklearn.decomposition import PCA,SparsePCA,KernelPCA
from sklearn.manifold import TSNE, Isomap

### Read CSV file for thread level features and separate is_rumor tag with data:

In [2]:
events=[
            "germanwings-crash",
            "sydneysiege",
            "ottawashooting",
            "ferguson",
            "charliehebdo",
        ]

events_threads={}
for event in events:
    X,y=fetch_thread(event)
    X=X.drop(X.columns.values[np.where(np.isnan(X.values))[1]],axis=1)
    events_threads[event]={'X':X.values,'y':y.values}


### Data Preprocessing:

In [27]:
def merge_datasets(events_to_merge):
    d=events_threads[events_to_merge[0]]['X'].shape[1]
    X=np.zeros((0,d))
    y=np.zeros((0))
    ytag=np.zeros((0))
    i=0
    for event in events_to_merge:
        X=np.concatenate((X,events_threads[event]['X']),axis=0)
        y=np.concatenate((y,events_threads[event]['y']),axis=0)
        ytag=np.concatenate((ytag,i*np.ones(events_threads[event]['y'].shape)),axis=0)
        i+=1
    print(X.shape,y.shape)
    return X, y,ytag

In [4]:
X,y,ytag=merge_datasets(['ferguson'])

(1010, 111) (1010,)


In [5]:
### PCA:

model=PCA(n_components=2)
model.fit(X)
Z_PCA=model.transform(X)
plt.figure()
plt.title("PCA")
plt.scatter(Z_PCA[:,0],Z_PCA[:,1],c=y)
plt.show()

In [6]:
### TSNE:

model=TSNE(n_components=2)
Z_TSNE=model.fit_transform(X)
plt.figure()
plt.title("TSNE")
plt.scatter(Z_TSNE[:,0],Z_TSNE[:,1],c=y)
plt.show()

In [7]:
### Isomap:


model=Isomap(n_components=2,n_neighbors=4)
Z_Isomap=model.fit_transform(X)
plt.figure()
plt.title("Isomap")
plt.scatter(Z_Isomap[:,0],Z_Isomap[:,1],c=y)
plt.show()

In [8]:
### SparsePCA:

model=SparsePCA(n_components=2,normalize_components=True)
model.fit(X)
Z_PCA=model.transform(X)
plt.figure()
plt.title("SparsePCA")
plt.scatter(Z_PCA[:,0],Z_PCA[:,1],c=y)
plt.show()

In [9]:
### KernelPCA:

model=KernelPCA(n_components=2)
model.fit(X)
Z_PCA=model.transform(X)
plt.figure()
plt.title("KernelPCA")
plt.scatter(Z_PCA[:,0],Z_PCA[:,1],c=y)
plt.show()

## Latent Factor models for all the dataset:

In [28]:
X,y,ytag=merge_datasets(events)

(5447, 111) (5447,)


In [29]:
### PCA:

model=PCA(n_components=2)
model.fit(X)
Z_PCA=model.transform(X)
plt.figure()
plt.title("PCA")
plt.scatter(Z_PCA[:,0],Z_PCA[:,1],c=ytag)
plt.show()

In [30]:
### TSNE:

model=TSNE(n_components=2)
Z_TSNE=model.fit_transform(X)
plt.figure()
plt.title("TSNE")
plt.scatter(Z_TSNE[:,0],Z_TSNE[:,1],c=ytag)
plt.show()

In [31]:
### Isomap:


model=Isomap(n_components=2,n_neighbors=4)
Z_Isomap=model.fit_transform(X)
plt.figure()
plt.title("Isomap")
plt.scatter(Z_Isomap[:,0],Z_Isomap[:,1],c=ytag)
plt.show()

In [32]:
### SparsePCA:

model=SparsePCA(n_components=2,normalize_components=True)
model.fit(X)
Z_PCA=model.transform(X)
plt.figure()
plt.title("SparsePCA")
plt.scatter(Z_PCA[:,0],Z_PCA[:,1],c=ytag)
plt.show()

In [33]:
### KernelPCA:

model=KernelPCA(n_components=2)
model.fit(X)
Z_PCA=model.transform(X)
plt.figure()
plt.title("KernelPCA")
plt.scatter(Z_PCA[:,0],Z_PCA[:,1],c=ytag)
plt.show()