In [1]:
import pandas as pd
import random
from faker import Faker

class FakeDataGenerator:
    
    def __init__(self, df = None, rows=5):
        """
        This class generates fake data
        
        If data frame is passed in it will add the fake data to it. 
        Otherwise, it will create a new data frame
        
        Parameters:
        -----------
        df: 
          DataFrame default is None
          
        rows:
          If df None, this parameter will be used to create a data frame with `row` length
        """
        # code goes under this line. 
        # I will deduct points if any code is written above the comments!
          
        self.fake = Faker()
        
        if df is not None:
            self.df = df
            self.rows = len(df)
        else:
            self.df = pd.DataFrame()
            self.rows = rows
        
        
    def add_random_number(self, column_name, small_value, high_value): # optional: add probabilities 
        """
        adds fake numbers to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        
        self.df[column_name] = [random.randint(small_value,high_value) for i in range(self.rows)]
        
        return self.df       
    
        
    def add_name(self, column_name):
        """
        adds fake name to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """

        self.df[column_name] = [self.fake.first_name() for i in range(self.rows)]
        
        return self.df
    
    
    def add_last_name(self, column_name):
        """
        adds fake last name to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        
        self.df[column_name] = [self.fake.last_name() for i in range(self.rows)]
        
        return self.df
    
    
    def add_email(self, column_name):
        """
        adds fake email to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """

        self.df[column_name] = [self.fake.email() for i in range(self.rows)]
        
        return self.df
    
    
    def add_phone_number(self, column_name):
        """
        adds fake phone number to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        
        self.df[column_name] = [self.fake.phone_number() for i in range(self.rows)]
        
        return self.df
    
    
    def add_address(self, column_name):
        """
        adds fake address to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        
        self.df[column_name] = [self.fake.address() for i in range(self.rows)]
        
        return self.df


In [2]:
import pandas as pd

# Case 1 - existing data frame 
df = pd.DataFrame()
df['product_id'] = [i + 123 for i in range(25)]
df.head()

Unnamed: 0,product_id
0,123
1,124
2,125
3,126
4,127


In [3]:
fakeGen = FakeDataGenerator(df) # I'm creating instance using existing df
fakeGen.add_name('name')
fakeGen.add_last_name('lastname')
fakeGen.add_address('shipping_address')
fakeGen.add_address('billing_address')
fakeGen.add_phone_number('customer_phone')

Unnamed: 0,product_id,name,lastname,shipping_address,billing_address,customer_phone
0,123,Jessica,Ballard,"465 Katherine Mount\nJeffreyborough, LA 26557","783 Browning Hollow Suite 818\nWheelerborough,...",001-082-116-3792x989
1,124,Christopher,Jensen,"861 Victoria Valleys Suite 742\nHubbardmouth, ...","84409 James Walk\nNew Brandon, MS 70442",447-996-0240x025
2,125,Michael,Hancock,"938 Contreras Stravenue\nMillerstad, LA 16341","30083 Cameron Manors Suite 431\nKimberlymouth,...",(194)885-0165x025
3,126,Chelsea,Kane,"531 Lewis Mews\nChristopherview, MI 57871","4173 Mikayla Court Suite 358\nRichardton, UT 3...",385-554-0091x020
4,127,Samantha,Ruiz,8312 Katrina Branch Suite 619\nLake Sydneyburg...,Unit 3621 Box 8719\nDPO AA 24221,(589)667-9816
5,128,Allison,Hurst,"72537 James Meadows\nPort Theresa, CA 87420",22620 Samantha Lane Apt. 113\nNorth Rachelvill...,338.618.7599
6,129,Tiffany,Carpenter,Unit 6954 Box 8878\nDPO AP 10484,"759 Lindsey Port\nEricfurt, UT 24147",050.443.8848x5746
7,130,Jodi,Anderson,"3245 Joseph Groves Suite 216\nNorth Rachel, VA...","74176 Susan Trail\nLake Lesliestad, HI 28233",970.159.0451x17662
8,131,Joseph,Li,"48577 Kennedy Glen\nDixonmouth, NE 64434",Unit 4878 Box 0686\nDPO AP 12638,065-352-4895x5895
9,132,Kimberly,Garrett,"527 Michael Gardens\nEast Brian, ND 68324","01171 Rodriguez Courts Apt. 528\nEricksontown,...",001-250-876-8269x10025


In [4]:
# Case 2 - no dataframe

fakeGen = FakeDataGenerator() # no df
fakeGen.add_address('shipping_address')
fakeGen.add_address('billing_address')
fakeGen.add_phone_number('customer_phone')
fakeGen.add_random_number('sale_count', 10, 100)

Unnamed: 0,shipping_address,billing_address,customer_phone,sale_count
0,"7399 Sarah Flat\nButlerbury, AK 54735",Unit 0644 Box 2927\nDPO AE 54247,(635)419-6320x1061,94
1,"11683 Michael Valley\nEugeneburgh, TN 90366",USS Porter\nFPO AE 56119,001-785-853-6788x557,47
2,USCGC Moran\nFPO AA 71205,"705 Michael Key\nRobbinsmouth, MI 14201",001-182-489-0982x78799,27
3,"412 Sanchez Mall\nNew Nicholasland, CT 23395","0203 Garcia View Suite 109\nSouth Lauren, AL 1...",+1-521-486-1636x0619,32
4,"1308 Vaughan Path Suite 664\nJoshuaside, ID 16305","163 Bond Alley\nBellton, MA 47355",8068857157,31
