Write a class that generates fake data.

* Write the code under the py docs ( do NOT remove py docs )
* Before comitting do a Restart & Run All

In [1]:
import pandas as pd
import random
from faker import Faker

class FakeDataGenerator:
    
    def __init__(self, df = None, rows=5):
        """
        This class generates fake data
        
        If data frame is passed in it will add the fake data to it. 
        Otherwise, it will create a new data frame
        
        Parameters:
        -----------
        df: 
          DataFrame default is None
          
        rows:
          If df None, this parameter will be used to create a data frame with `row` length
        """
        # code goes under this line. 
        # I will deduct points if any code is written above the comments!
          
        self.fake = Faker()
        
        if df is not None:
            self.df = df
            self.rows = len(df)
        else:
            self.df = pd.DataFrame()
            self.rows = rows
        
        
    def add_random_number(self, column_name, small_value, high_value): # optional: add probabilities 
        """
        adds fake numbers to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        
        self.df[column_name] = [random.randint(small_value,high_value) for i in range(self.rows)]
        
        return self.df       
    
        
    def add_name(self, column_name):
        """
        adds fake name to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """

        self.df[column_name] = [self.fake.first_name() for i in range(self.rows)]
        
        return self.df
    
    
    def add_last_name(self, column_name):
        """
        adds fake last name to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        
        self.df[column_name] = [self.fake.last_name() for i in range(self.rows)]
        
        return self.df
    
    
    def add_email(self, column_name):
        """
        adds fake email to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """

        self.df[column_name] = [self.fake.email() for i in range(self.rows)]
        
        return self.df
    
    
    def add_phone_number(self, column_name):
        """
        adds fake phone number to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        from faker import Faker
        fake = Faker()
        
        self.df[column_name] = [self.fake.phone_number() for i in range(self.rows)]
        
        return self.df
    
    
    def add_address(self, column_name):
        """
        adds fake address to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        from faker import Faker
        fake = Faker()
        
        self.df[column_name] = [self.fake.address() for i in range(self.rows)]
        
        return self.df
    
    

In [2]:
import pandas as pd

# Case 1 - existing data frame 
df = pd.DataFrame()
df['product_id'] = [i + 123 for i in range(25)]
df.head()

Unnamed: 0,product_id
0,123
1,124
2,125
3,126
4,127


In [3]:
fakeGen = FakeDataGenerator(df) # I'm creating instance using existing df
fakeGen.add_name('name')
fakeGen.add_last_name('lastname')
fakeGen.add_address('shipping_address')
fakeGen.add_address('billing_address')
fakeGen.add_phone_number('customer_phone')

Unnamed: 0,product_id,name,lastname,shipping_address,billing_address,customer_phone
0,123,Chad,Pierce,"8505 Pacheco Streets Suite 204\nJohnsontown, A...","06400 Irwin Pines\nNorth Jenniferland, MT 11992",001-587-350-9204x065
1,124,Kyle,Taylor,"3514 Ross Ports\nEvelynchester, WV 18669","67216 Keller Freeway\nGrossshire, SD 35848",(210)285-1652x35924
2,125,David,Vaughn,"415 Tristan Oval Suite 226\nHannahfurt, GA 84130","8846 Johnson Dale Suite 625\nNew Michaelland, ...",+1-761-879-9237x364
3,126,Natalie,Lee,"6294 Romero Walk Suite 865\nEast Lindastad, WY...","44965 Kyle Vista Apt. 217\nNew Teresa, MS 31404",(000)023-9407x722
4,127,Andrew,Mason,"31107 Rose Plain Suite 731\nNew Jamesberg, ND ...","966 Park Common Suite 231\nEast Stacy, GA 32074",001-655-306-4936x7514
5,128,Katherine,Payne,"10603 Smith Ways\nJasonberg, NC 82150",Unit 5039 Box 3761\nDPO AA 81489,001-872-005-4243
6,129,Jacob,Bruce,"714 Jennifer River\nDanielview, MO 30203","53771 Torres Pass\nEast Troyfort, MO 55880",323.984.7736
7,130,Kenneth,Roberts,86974 Brown Stravenue Suite 547\nLake Stephani...,"0625 John Shore Apt. 076\nWest Henry, PA 32365",+1-435-459-3634x61021
8,131,Ashley,Rice,"7332 Lewis Locks Apt. 267\nCharlesbury, NE 23023","3120 Edward Trail Suite 206\nLake Brentburgh, ...",456.327.9865x602
9,132,George,Wolf,"53173 Emily Canyon\nEast Alicialand, PA 92096","3658 Burch Shore\nChristopherfort, KS 47192",487.070.9638


In [4]:
# Case 2 - no dataframe

fakeGen = FakeDataGenerator() # no df
fakeGen.add_address('shipping_address')
fakeGen.add_address('billing_address')
fakeGen.add_phone_number('customer_phone')
fakeGen.add_random_number('sale_count', 10, 100)

Unnamed: 0,shipping_address,billing_address,customer_phone,sale_count
0,"1543 Jerry Greens Apt. 012\nChristensenport, I...","138 Davidson Fork\nCookborough, NC 20471",001-326-189-3490x5054,31
1,"PSC 6320, Box 3264\nAPO AP 82614","16585 Clark Center Apt. 268\nColinside, NE 71613",252.942.0832x3202,77
2,"146 Anna Ferry\nNorth Emily, WV 16288","5315 Fisher Points Apt. 510\nNew Cynthia, OK 4...",001-386-919-3352x94396,52
3,"19402 Michael Ridge Suite 252\nEast Anna, PA 9...","42529 Brown Rapids Suite 855\nJoanborough, WI ...",001-113-781-6551x47030,59
4,"4307 Sean Mill Suite 202\nNorth Ericaview, WI ...","3909 Julia Crescent\nAlexandershire, NM 48838",001-342-316-7535,84
