In [None]:
!pip install plotly
!pip install pandas
!pip install pytz

# t-SNE clustering of your LinkedIn network

## Fill with your own data downloaded from the settings page

In [None]:
import pandas as pd
import plotly.express as px
import plotly.offline as pyo
pyo.init_notebook_mode()

# Prepare data

In [None]:
df = pd.read_csv('Connections.csv')
df = df.dropna(subset=['Company']).dropna(subset=['Position']).drop('Email Address', 1)
df['sort_val'] = df.Position.apply(lambda x: x.split(" ")[0])
df = df.sort_values(by='sort_val').drop('sort_val', 1)
df.head(n=10)

# One-Hot encoding

In [None]:
nominal_features = pd.get_dummies(df.loc[:, ['Company', 'Position']], prefix=['Company', 'Position'], drop_first=True)
y = df.copy().pop('Position').values
X = df[['Company']].copy()
X_d = pd.get_dummies(X, drop_first=True)
X_d.head()

# t-SNE clustering by Company to predict Position

In [None]:
from sklearn.manifold import TSNE
import seaborn as sns
import matplotlib.pyplot as plt

# We want to get TSNE embedding with 2 dimensions
n_components = 2
tsne = TSNE(n_components)
tsne_result = tsne.fit_transform(X_d)
tsne_result.shape

In [None]:
# Plot the result of our TSNE with the label color coded
# A lot of the stuff here is about making the plot look pretty and not TSNE
tsne_result_df = pd.DataFrame({'tsne_1': tsne_result[:,0], 'tsne_2': tsne_result[:,1], 'label': y})
fig, ax = plt.subplots(1,figsize=(30,30))
sns.scatterplot(x='tsne_1', y='tsne_2', hue='label', data=tsne_result_df, ax=ax,s=120)
lim = (tsne_result.min()-5, tsne_result.max()+5)
ax.set_xlim(lim)
ax.set_ylim(lim)
ax.set_aspect('equal')
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)