Write a class that generates fake data.

* Write the code under the py docs ( do NOT remove py docs )
* Before comitting do a Restart & Run All

In [8]:
import pandas as pd
import random
from faker import Faker

class FakeDataGenerator:
    
    def __init__(self, df = None, rows=5):
        """
        This class generates fake data
        
        If data frame is passed in it will add the fake data to it. 
        Otherwise, it will create a new data frame
        
        Parameters:
        -----------
        df: 
          DataFrame default is None
          
        rows:
          If df None, this parameter will be used to create a data frame with `row` length
        """
        # code goes under this line. 
        # I will deduct points if any code is written above the comments!
          
        self.fake = Faker()
        
        if df is not None:
            self.df = df
            self.rows = self.df.shape[0] 
        else:
            self.df = pd.DataFrame()
            self.rows = rows
        
        
    def add_random_number(self, column_name, small_value, high_value): # optional: add probabilities 
        """
        adds fake numbers to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        
        self.df[column_name] = [random.randint(small_value,high_value) for i in range(self.rows)]
        
        return self.df       
    
        
    def add_name(self, column_name):
        """
        adds fake name to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """

        self.df[column_name] = [self.fake.first_name() for i in range(self.rows)]
        
        return self.df
    
    
    def add_last_name(self, column_name):
        """
        adds fake last name to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        
        self.df[column_name] = [self.fake.last_name() for i in range(self.rows)]
        
        return self.df
    
    
    def add_email(self, column_name):
        """
        adds fake email to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """

        self.df[column_name] = [self.fake.email() for i in range(self.rows)]
        
        return self.df
    
    
    def add_phone_number(self, column_name):
        """
        adds fake phone number to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        from faker import Faker
        fake = Faker()
        
        self.df[column_name] = [self.fake.phone_number() for i in range(self.rows)]
        
        return self.df
    
    
    def add_address(self, column_name):
        """
        adds fake address to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        from faker import Faker
        fake = Faker()
        
        self.df[column_name] = [self.fake.address() for i in range(self.rows)]
        
        return self.df
    
    

In [9]:
import pandas as pd

# Case 1 - existing data frame 
df = pd.DataFrame()
df['product_id'] = [i + 123 for i in range(25)]
df.head()

Unnamed: 0,product_id
0,123
1,124
2,125
3,126
4,127


In [10]:
fakeGen = FakeDataGenerator(df) # I'm creating instance using existing df
fakeGen.add_name('name')
fakeGen.add_last_name('lastname')
fakeGen.add_address('shipping_address')
fakeGen.add_address('billing_address')
fakeGen.add_phone_number('customer_phone')

Unnamed: 0,product_id,name,lastname,shipping_address,billing_address,customer_phone
0,123,Nicole,Flowers,"092 Mccarthy Skyway\nRileyville, SD 21111","352 Holt Row Suite 928\nMillertown, AL 26500",923-648-2214
1,124,Anne,Kennedy,USS Santos\nFPO AP 22086,"615 Angela Radial Apt. 519\nBradport, VT 09506",001-011-981-7763x062
2,125,Elizabeth,Diaz,"05900 Marcus Road Apt. 990\nLake Maryburgh, KS...","17064 Flynn Road Apt. 609\nJasonberg, MT 67566",+1-931-904-0934x790
3,126,Cheryl,Day,USS Fitzpatrick\nFPO AP 02754,"424 Hardin Cove Suite 872\nBruceborough, DC 70169",684.770.0406
4,127,Kayla,Combs,"0015 James Manors\nWrighthaven, MO 93196","822 Heather Village\nEast Robert, TX 99924",001-043-224-1140
5,128,Sherri,Vance,"4796 Jessica Hill\nShannontown, UT 02641","10641 Humphrey Divide Apt. 178\nYoungbury, CT ...",+1-066-409-9060x1180
6,129,Shelby,Simmons,"0660 Johnny Cliff Apt. 044\nUnderwoodborough, ...","55066 Singh Bypass\nNorth Gregoryburgh, WA 55702",178-371-9682
7,130,David,Brewer,"9953 Nicole Via\nMaciasbury, AR 09528","8635 Vaughn Passage\nPort Amanda, KY 23446",(367)924-2907x7093
8,131,Glenn,Cohen,"236 Kenneth Divide Apt. 765\nJuarezview, OH 04740","901 Ryan Rue\nMalloryhaven, OR 30703",863.263.4166
9,132,Taylor,Murphy,"69430 Gregory Meadow\nLake Craigside, VA 54855","16053 Nicholas Rue Suite 521\nNew Scott, NC 76392",+1-897-875-0827x147


In [11]:
# Case 2 - no dataframe

fakeGen = FakeDataGenerator() # no df
fakeGen.add_address('shipping_address')
fakeGen.add_address('billing_address')
fakeGen.add_phone_number('customer_phone')
fakeGen.add_random_number('sale_count', 10, 100)

Unnamed: 0,shipping_address,billing_address,customer_phone,sale_count
0,"28104 Laura Freeway Apt. 251\nPort Samueltown,...","806 James Islands\nEast Rosetown, MI 51269",+1-311-425-1662x36973,92
1,"2421 Robin Flats\nWest Valerietown, VT 29943","683 John Square\nMadisonville, AZ 98304",884-632-3967,16
2,"35739 Green Lakes\nPruittview, MI 21972","150 Michelle Lodge Suite 926\nAdamsfort, NE 36769",+1-979-101-7601x56269,29
3,"243 Holloway Corner\nMelissafurt, NV 56481","141 Joshua Springs Apt. 878\nLake Daniel, IN 2...",001-556-225-8984x5697,99
4,"363 Rodriguez Port Suite 935\nFrancisbury, WY ...","47274 Giles Glen\nWest Josefort, VT 74768",031-632-2243,56
