# Adding a New Dating Profile
Using Classification or Clustering for a New Dating Profile

### Importing Libraries and Data

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', 500)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import _pickle as pickle
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import calinski_harabaz_score, silhouette_score, davies_bouldin_score
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm_notebook as tqdm

#### Loading the Profiles

In [2]:
# Loading in the cleaned DF
with open("profiles.pkl",'rb') as fp:
    raw_df = pickle.load(fp)

# Viewing the DF    
raw_df.head()

Unnamed: 0,Bios,Movies,TV,Religion,Music,Sports,Books,Politics
0,Typical twitter fanatic. Infuriatingly humble thinker. Lifelong coffee practitioner. Organizer.,5,3,4,1,3,6,7
1,Web junkie. Analyst. Infuriatingly humble introvert. Food nerd. Lifelong music fanatic. Coffee lover.,7,9,5,1,9,4,0
2,Avid web maven. Food practitioner. Gamer. Twitter fanatic. Pop culture scholar. Zombie evangelist.,1,2,6,5,6,5,4
3,Twitteraholic. Extreme web fanatic. Food buff. Infuriatingly humble entrepreneur.,5,2,7,8,2,6,6
4,Bacon enthusiast. Falls down a lot. Freelance social media fan. Infuriatingly humble introvert.,6,6,6,4,3,6,3


#### Loading the Clustered Profiles

In [3]:
# Loading in the clustered DF
with open("clustered_profiles.pkl",'rb') as fp:
    cluster_df = pickle.load(fp)

# Viewing the DF    
cluster_df.head()

Unnamed: 0,Bios,Movies,TV,Religion,Music,Sports,Books,Politics,Cluster #
0,Typical twitter fanatic. Infuriatingly humble thinker. Lifelong coffee practitioner. Organizer.,5.0,3.0,4.0,1.0,3.0,6.0,7.0,9
1,Web junkie. Analyst. Infuriatingly humble introvert. Food nerd. Lifelong music fanatic. Coffee lover.,7.0,9.0,5.0,1.0,9.0,4.0,0.0,9
2,Avid web maven. Food practitioner. Gamer. Twitter fanatic. Pop culture scholar. Zombie evangelist.,1.0,2.0,6.0,5.0,6.0,5.0,4.0,1
3,Twitteraholic. Extreme web fanatic. Food buff. Infuriatingly humble entrepreneur.,5.0,2.0,7.0,8.0,2.0,6.0,6.0,9
4,Bacon enthusiast. Falls down a lot. Freelance social media fan. Infuriatingly humble introvert.,6.0,6.0,6.0,4.0,3.0,6.0,3.0,10


## Creating the New Profile Data

In [12]:
len(raw_df.iloc[0,1:])

7

In [31]:
# Instantiating a new DF row to append later
new_profile = pd.DataFrame(columns=raw_df.columns)

for i in new_profile.columns[1:]:
    new_profile[i] = np.random.randint(0,10,1)

print("Enter new profile information...\n\nExample Bio:\nBacon enthusiast. Falls down a lot. Freelance social media fan. Infuriatingly humble introvert.")

new_profile['Bios'] = input("Enter a Bio for yourself: ")

new_profile.index = [raw_df.index[-1] + 1]

Enter new profile information...

Example Bio:
Bacon enthusiast. Falls down a lot. Freelance social media fan. Infuriatingly humble introvert.
Enter a Bio for yourself: food lover. social media fanatic. extraordinarily humble. life lover.


In [32]:
new_profile

Unnamed: 0,Bios,Movies,TV,Religion,Music,Sports,Books,Politics
6600,food lover. social media fanatic. extraordinarily humble. life lover.,4,9,3,6,6,0,3


# Two Approaches
1. Cluster all the profiles again with the new profile

2. Classify the new profile with a classification model

## Clustering the New Profile Data