# Profile Maker/Formatter/Configuration

In [1]:
# Importing Libraries
import _pickle as pickle
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Importing Data
with open("Pickles/profiles.pkl",'rb') as fp:
    data = pickle.load(fp)

# Defining the Profile Class

In [3]:
# Creating the class object
class CreateProfile:
    
    def __init__(self, dataset=None, profile=None):
        """
        Using a given DF of profiles, creates a new profile based on information from that given DF
        
        If profile already given, allows formatting of that profile
        """
        
        # Checking if we have DFs in our arguments
        
        # Initializing instances of the smaller profile DF and the larger DF
        if type(dataset) != pd.core.frame.DataFrame:
            
            self.dataset = pd.DataFrame()
            
        else:
            self.dataset = dataset
                
        # Handling the profile
        if type(profile) != pd.core.frame.DataFrame:
            
            # Initializing a new DF for the new profile with a new index or user number
            try:
                self.profile = pd.DataFrame(index=[self.dataset.index[-1] + 1])
                
            # If starting from an empty DF
            except:
                self.profile = pd.DataFrame(index=[0])
            
        else:
            # Using the given profile
            self.profile = profile
            
        # Vectorized version of the profile, will be N/A until the vect_text() method is used
        self.vectorized_text = "Use vect_text() to return a vectorized DF"
        
        # Scaled version of the profile, will be N/A until the scale_profile() method is used
        self.scaled_profile = "Use scale_profile() to return a scaled DF"
        
        # Formatted version of the profile, which contains the scaled and vectorized data of the profile
        self.formatted_profile = "Use format_profile() to return a both scaled and vectorized DF"
        
        # A combined DF containing both the original data and the new profile, will be N/A until add_profile_to_dataset() is used
        self.combined_df = "Use add_profile_to_dataset() to return the combined DF"
    
        
    def enter_info(self, random_info=True):
        """
        Enter information for the new profile either by user text input
        Or through random information from the larger dataset
        """
        
        if self.profile.empty:
            
            # Iterating through the columns of the larger profile in order to add new data to the smaller profile
            for i in self.dataset.columns:
                
                if random_info:
                    # Entering random information originally from profiles from the bigger profile
                    self.profile[i] = self.dataset[i].sample(1).to_numpy()
                
                else:
                    # Will need type checking
                    self.profile[i] = input(f"Enter info for {i}")
                    
            return self.profile
        
        else:
            
            # If there is already data in the profile
            return "Data already contained in the profile"
        
    
    def add_profile_to_dataset(self):
        """
        Appends the new profile to the dataset to return a new larger dataset containing the brand new profile
        
        Only will use the original format of the DF, no vectorized or scaled DFs
        """
        
        dataset_feats = self.dataset.columns
        
        profile_feats = self.profile.columns
                
        # Check to see if the profile profile contains the same features as the larger profile
        if dataset_feats.all()==profile_feats.all():
            
            # Appending the profile the larger profile
            self.combined_df = self.dataset.append(self.profile)
            
            return self.combined_df
        
        else:
            
            # If profile features/columns don't line up with the dataset's
            return "Profile features do not match larger dataset"
        
        
    def vect_text(self):
        """
        Given new profile data
        
        Replaces the text in the profile with a vectorized array of numbers.
        """
        
        # Finding all the text in the profile
        text = self.profile['Bios']
        
        # Instantiating the vectorizer
        vectorizer = CountVectorizer()
        
        # Fitting and transforming the text
        vect_words = vectorizer.fit_transform(text)
        
        # Converting the vectorized words into a DF
        self.vectorized_text = pd.DataFrame(vect_words.toarray(),
                                            index=self.profile.index,
                                            columns=vectorizer.get_feature_names())
        
        return self.vectorized_text
    
    
    def scale_profile(self, exclude=['Bios']):
        """
        Given a profile with information included
        
        Scales necessary features from the profile DF from 0 to 1 in relation the overall larger DF
        
        Does not scale features in the exclude list
        """
        
        # Instantiating the scaler we will use
        scaler = MinMaxScaler()
        
        # Creating a new DF for the scaled profile
        self.scaled_profile = pd.DataFrame(index=self.profile.index)
        
        # Iterating only through the necessary columns
        for col in self.dataset.columns:
            
            # Skipping columns we don't want to scale (i.e. text columns)
            if col in exclude:
                pass
            
            else:
                # Fitting the scaler to the larger DF
                scaler.fit(self.dataset[[col]])
                
                # Transforming the values based on the larger DF
                self.scaled_profile[col] = scaler.transform(self.profile[[col]])
                
        # Returning the final scaled profile
        return self.scaled_profile
                
        
    def format_profile(self):
        """
        Uses both scaling and vectorizing to format the profile DF
        """
        
        try:
            # If the attributes have already been instantiated by the methods before
            self.formatted_profile = pd.concat([self.scaled_profile, self.vectorized_text], axis=1)
            
        except:
            
            # If not, run the methods here
            self.formatted_profile = pd.concat([scale_profile(), vect_text()], axis=1)
        
        # Return the formatted profile DF
        return self.formatted_profile

# Testing our Class Object

In [4]:
# Instantiating the class object
new_profile = CreateProfile(dataset=data)

## Class Instance Methods

In [5]:
new_profile.enter_info()

Unnamed: 0,Bios,Movies,TV,Religion,Music,Sports,Books,Politics
6600,Freelance troublemaker. Introvert. Social medi...,4,4,5,0,8,5,6


In [6]:
new_profile.add_profile_to_dataset()

Unnamed: 0,Bios,Movies,TV,Religion,Music,Sports,Books,Politics
0,Typical twitter fanatic. Infuriatingly humble ...,5,3,4,1,3,6,7
1,Web junkie. Analyst. Infuriatingly humble intr...,7,9,5,1,9,4,0
2,Avid web maven. Food practitioner. Gamer. Twit...,1,2,6,5,6,5,4
3,Twitteraholic. Extreme web fanatic. Food buff....,5,2,7,8,2,6,6
4,Bacon enthusiast. Falls down a lot. Freelance ...,6,6,6,4,3,6,3
...,...,...,...,...,...,...,...,...
6596,Avid web junkie. Lifelong alcohol guru. Hardco...,4,3,6,3,7,7,2
6597,Music ninja. Bacon fanatic. Reader. Total comm...,1,4,0,4,9,2,5
6598,Communicator. Bacon lover. Award-winning intro...,6,2,0,3,8,9,1
6599,Unapologetic tv aficionado. Devoted twitter en...,2,1,8,7,0,5,5


In [7]:
new_profile.vect_text()

Unnamed: 0,advocate,freelance,incurable,introvert,media,practitioner,problem,social,solver,troublemaker,twitter
6600,1,1,1,1,1,1,1,1,1,1,1


In [8]:
new_profile.scale_profile()

Unnamed: 0,Movies,TV,Religion,Music,Sports,Books,Politics
6600,0.444444,0.444444,0.555556,0.0,0.888889,0.555556,0.666667


In [9]:
new_profile.format_profile()

Unnamed: 0,Movies,TV,Religion,Music,Sports,Books,Politics,advocate,freelance,incurable,introvert,media,practitioner,problem,social,solver,troublemaker,twitter
6600,0.444444,0.444444,0.555556,0.0,0.888889,0.555556,0.666667,1,1,1,1,1,1,1,1,1,1,1


## Class Attributes

In [10]:
new_profile.vectorized_text

Unnamed: 0,advocate,freelance,incurable,introvert,media,practitioner,problem,social,solver,troublemaker,twitter
6600,1,1,1,1,1,1,1,1,1,1,1


In [11]:
new_profile.scaled_profile

Unnamed: 0,Movies,TV,Religion,Music,Sports,Books,Politics
6600,0.444444,0.444444,0.555556,0.0,0.888889,0.555556,0.666667


In [12]:
new_profile.formatted_profile

Unnamed: 0,Movies,TV,Religion,Music,Sports,Books,Politics,advocate,freelance,incurable,introvert,media,practitioner,problem,social,solver,troublemaker,twitter
6600,0.444444,0.444444,0.555556,0.0,0.888889,0.555556,0.666667,1,1,1,1,1,1,1,1,1,1,1


In [13]:
new_profile.combined_df

Unnamed: 0,Bios,Movies,TV,Religion,Music,Sports,Books,Politics
0,Typical twitter fanatic. Infuriatingly humble ...,5,3,4,1,3,6,7
1,Web junkie. Analyst. Infuriatingly humble intr...,7,9,5,1,9,4,0
2,Avid web maven. Food practitioner. Gamer. Twit...,1,2,6,5,6,5,4
3,Twitteraholic. Extreme web fanatic. Food buff....,5,2,7,8,2,6,6
4,Bacon enthusiast. Falls down a lot. Freelance ...,6,6,6,4,3,6,3
...,...,...,...,...,...,...,...,...
6596,Avid web junkie. Lifelong alcohol guru. Hardco...,4,3,6,3,7,7,2
6597,Music ninja. Bacon fanatic. Reader. Total comm...,1,4,0,4,9,2,5
6598,Communicator. Bacon lover. Award-winning intro...,6,2,0,3,8,9,1
6599,Unapologetic tv aficionado. Devoted twitter en...,2,1,8,7,0,5,5
