# Profile Maker/Formatter/Configuration

In [68]:
import _pickle as pickle
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import MinMaxScaler

In [100]:
class CreateProfile:
    
    def __init__(self, dataset=pd.DataFrame(), profile=pd.DataFrame()):
        
        # Initializing instances of the smaller profile profile and the larger profile
        self.dataset = dataset
        
        if profile.empty:
            
            # Initializing a new DF for the new profile with a new index or user number
            self.profile = pd.DataFrame(index=[dataset.index[-1] + 1])
            
        else:
            # Using the given profile
            self.profile = profile
    
        
    def enter_info(self, random_info=True):
        """
        Enter information for the new profile either by user text input
        Or through random information from the larger dataset
        """
        
        if self.profile.empty:
            
            # Iterating through the columns of the larger profile in order to add new data to the smaller profile
            for i in self.dataset.columns:
                
                if random_info:
                    # Entering random information originally from profiles from the bigger profile
                    self.profile[i] = self.dataset[i].sample(1).to_numpy()
                
                else:
                    # Will need type checking
                    self.profile[i] = input(f"Enter info for {i}")
                    
            return self.profile
        
        else:
            
            # If there is already data in the profile
            return "Data already contained in the profile"
        
    
    def add_profile_to_dataset(self):
        """
        Appends the new profile to the dataset to return a new larger dataset containing the brand new profile
        """
        
        dataset_feats = self.dataset.columns
        
        profile_feats = self.profile.columns
                
        # Check to see if the profile profile contains the same features as the larger profile
        if dataset_feats.all()==profile_feats.all():
            
            # Appending the profile the larger profile
            return self.dataset.append(self.profile)
        
        else:
            
            # If profile features/columns don't line up with the dataset's
            return "Profile features do not match larger dataset"
        
        
    def vect_text(self):
        """
        Given a profile DF with information included
        
        Replaces the text in the profile with a vectorized array of numbers.
        """
        
        # Finding all the text in the profile
        text = self.profile['Bios']
        
        # Instantiating the vectorizer
        vectorizer = CountVectorizer()
        
        # Fitting the vectorizer
        vect_words = vectorizer.fit_transform(text)
        
        # Converting the vectorized words into a DF
        vect_df = pd.DataFrame(vect_words.toarray(),
                               index=self.profile.index,
                               columns=vectorizer.get_feature_names())
        
        # Concatenating the vectorize DF with the profile DF
        concat = pd.concat([self.profile, vect_df], axis=1)
        
        # Dropping the original text from the DF
        concat.drop('Bios', axis=1, inplace=True)
        
        return concat

In [101]:
with open("Pickles/profiles.pkl",'rb') as fp:
    data = pickle.load(fp)

In [102]:
data

Unnamed: 0,Bios,Movies,TV,Religion,Music,Sports,Books,Politics
0,Typical twitter fanatic. Infuriatingly humble ...,5,3,4,1,3,6,7
1,Web junkie. Analyst. Infuriatingly humble intr...,7,9,5,1,9,4,0
2,Avid web maven. Food practitioner. Gamer. Twit...,1,2,6,5,6,5,4
3,Twitteraholic. Extreme web fanatic. Food buff....,5,2,7,8,2,6,6
4,Bacon enthusiast. Falls down a lot. Freelance ...,6,6,6,4,3,6,3
...,...,...,...,...,...,...,...,...
6595,Typical pop culture nerd. Infuriatingly humble...,7,9,0,0,2,2,4
6596,Avid web junkie. Lifelong alcohol guru. Hardco...,4,3,6,3,7,7,2
6597,Music ninja. Bacon fanatic. Reader. Total comm...,1,4,0,4,9,2,5
6598,Communicator. Bacon lover. Award-winning intro...,6,2,0,3,8,9,1


In [103]:
new_profile = CreateProfile(dataset=data)

In [104]:
new_profile.enter_info()

Unnamed: 0,Bios,Movies,TV,Religion,Music,Sports,Books,Politics
6600,Analyst. Coffee aficionado. Devoted bacon nerd...,3,9,5,1,2,3,0


In [105]:
new_profile.add_profile_to_dataset()

Unnamed: 0,Bios,Movies,TV,Religion,Music,Sports,Books,Politics
0,Typical twitter fanatic. Infuriatingly humble ...,5,3,4,1,3,6,7
1,Web junkie. Analyst. Infuriatingly humble intr...,7,9,5,1,9,4,0
2,Avid web maven. Food practitioner. Gamer. Twit...,1,2,6,5,6,5,4
3,Twitteraholic. Extreme web fanatic. Food buff....,5,2,7,8,2,6,6
4,Bacon enthusiast. Falls down a lot. Freelance ...,6,6,6,4,3,6,3
...,...,...,...,...,...,...,...,...
6596,Avid web junkie. Lifelong alcohol guru. Hardco...,4,3,6,3,7,7,2
6597,Music ninja. Bacon fanatic. Reader. Total comm...,1,4,0,4,9,2,5
6598,Communicator. Bacon lover. Award-winning intro...,6,2,0,3,8,9,1
6599,Unapologetic tv aficionado. Devoted twitter en...,2,1,8,7,0,5,5


In [106]:
new_profile.vect_text()

Unnamed: 0,Movies,TV,Religion,Music,Sports,Books,Politics,aficionado,alcohol,analyst,bacon,coffee,devoted,expert,geek,nerd,proud,twitter
6600,3,9,5,1,2,3,0,1,1,1,1,1,1,1,1,1,1,1
