In [1]:
!pip install faker



Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
You should consider upgrading via the 'c:\users\ktgan\appdata\local\programs\python\python36\python.exe -m pip install --upgrade pip' command.


In [2]:
import sys
from collections import namedtuple, Counter
from datetime import date
from random import choice, randint, uniform
from faker import Faker

In [3]:
def getfaker_obj():
    '''
    Faker is a python library to generate fake data
    Function which returns the faker object
    Seed helps to generate the same data in multiple runs of the function
    '''
    Faker.seed(47)
    fake = Faker()
    return fake

In [4]:
def getfaker_profile(fakerobj):
    '''
    Function to generate fake profile data
    Profile contains information like name, gender, job, ssn, current location, birthdate etc.
    Since we are interested only in certain fields, create a namedtuple to hold that information
    Interested fields for inspection are: blood_group, current_location age
    '''
    profile = fakerobj.profile()

    # Faker profile contains birthdate and from which age is calculated
    today = date.today()
    age = (today - profile['birthdate']).days

    Profile = namedtuple('Profile', 'blood_group latitude longitude birthdate age')
    # Returning the relevant profile information in a named tuple
    return Profile(profile['blood_group'], float(profile['current_location'][0]), \
                    float(profile['current_location'][1]), profile['birthdate'], age)

In [5]:
def generate_profiles(num_profiles = 1, profiles = None):
    '''
    Function to generate the fake profiles
    num_profiles denote the number of profiles requried to generate
    Default value is '1' and can be changed by the caller
    '''
    if profiles is not None:
        profiles_list = profiles
    else:
        profiles_list = []
        fake = getfaker_obj()
        for _ in range(num_profiles):
            # Update the list with each profile generated
            profiles_list.append(getfaker_profile(fake))
    return profiles_list

In [6]:
def getfaker_profile_dict(fakerobj):
    '''
    Function to generate fake profile data
    Profile contains information like name, gender, job, ssn, current location, birthdate etc.
    Interested fields for inspection are: blood_group, current_location age
    '''
    profile = fakerobj.profile()

    today = date.today()
    age = (today - profile['birthdate']).days

    profile_dict = dict.fromkeys(['blood_group', 'latitude', 'longitude' 'birthdate', 'age'], 0)

    profile_dict['blood_group'] = profile['blood_group']
    profile_dict['latitude'] = profile['current_location'][0]
    profile_dict['longitude'] = profile['current_location'][1]
    profile_dict['birthdate'] = profile['birthdate']
    profile_dict['age'] = age
    return profile_dict

In [7]:
def generate_profiles_dict(num_profiles = 1, profiles = None):
    '''
    Function to generate the fake profiles
    num_profiles denote the number of profiles requried to generate
    Default value is '1' and can be changed by the caller

    if profiles is sent by the caller the same information is used to populate the dictionary
    else new profiles will be created in dict fashion
    '''
    profiles_dict = {}
    if profiles is not None:
        for index, profile in enumerate(profiles):
            profiles_dict[index] = dict(profile._asdict())
    else:
        fake = getfaker_obj()
        for index in range(num_profiles):
            # Update the list with each profile generated
            profiles_dict[index] = getfaker_profile(fake)
    return profiles_dict

In [8]:
def calc_time_nt_largest_blood_group(profiles):
    '''
    Calculates the largest blood group from the list of profiles
    Extracts the blood_group to a list and use Counter to find the largest available blood group
    '''
    blood_group_list = []  

    for profile in profiles:
        # profile is a namedtuple and fields can be accessed using . (dot) operator
        blood_group_list.append(profile.blood_group)

    large_blood_group = Counter(blood_group_list).most_common(1)
    return large_blood_group

In [9]:
def calc_time_dict_largest_blood_group(profiles):
    '''
    Calculates the largest blood group from the list of profiles
    Extracts the blood_group to a list and use Counter to find the largest available blood group
    '''
    blood_group_list = []  
    #profiles is a dictionary and iterated among its values
    for profile in profiles.values():
        blood_group_list.append(profile['blood_group'])

    large_blood_group = Counter(blood_group_list).most_common(1)
    return large_blood_group

In [10]:
def calc_time_nt_mean_current_location(profiles):
    '''
    Calculates the mean current location from the list of profiles
    Extracts the lattitude and longitude to a list and computes average
    '''
    lat_list = []
    long_list = []

    for profile in profiles:
        lat_list.append(float(profile.latitude))
        long_list.append(float(profile.longitude))

    mean_location = [sum(lat_list)/len(profiles), sum(long_list)/len(profiles)]
    return mean_location

In [11]:
def calc_time_dict_mean_current_location(profiles):
    '''
    Calculates the mean current location from the list of profiles
    Extracts the lattitude and longitude to a list and computes average
    '''
    lat_list = []
    long_list = []

    for profile in profiles.values():
        lat_list.append(float(profile['latitude']))
        long_list.append(float(profile['longitude']))

    mean_location = [sum(lat_list)/len(profiles), sum(long_list)/len(profiles)]
    return mean_location


In [12]:
def calc_time_nt_oldest_average_age(profiles):
    '''
    Calculates the oldest age and average age from the list of profiles
    Extracts the age information to a list and computes maximum value and average
    '''
    age_list = []

    for profile in profiles:
        age_list.append(profile.age)

    oldest_age = max(age_list)
    average_age = (sum(age_list)/len(profiles))
    return oldest_age, average_age

In [13]:
def calc_time_dict_oldest_average_age(profiles):
    '''
    Calculates the oldest age and average age from the list of profiles
    Extracts the age information to a list and computes maximum value and average
    '''
    age_list = []

    for profile in profiles.values():
        age_list.append(profile['age'])

    oldest_age = max(age_list)
    average_age = (sum(age_list)/len(profiles))
    return oldest_age, average_age

In [14]:
num_profiles = 10000

In [15]:
profiles = generate_profiles(num_profiles)
len(profiles), type(profiles[0])

(10000, __main__.Profile)

In [16]:
profiles_dict = generate_profiles_dict(num_profiles, profiles)
len(profiles_dict), type(profiles_dict)

(10000, dict)

In [17]:
assert len(profiles) == num_profiles, 'Mismatch in length of profiles'
assert('g' not in [profile.blood_group for profile in profiles]), 'Hmm! \'g\' seems to be a rare blood-group, never heard of it!'
assert(0 not in [profile.latitude for profile in profiles]), 'No zer-coordinates for location!'
assert(0 not in [profile.longitude for profile in profiles]), 'No zer-coordinates for location!'

In [18]:
result_bt_nt = calc_time_nt_largest_blood_group(profiles)
result_bt_dict = calc_time_dict_largest_blood_group(profiles_dict)
result_bt_nt, result_bt_dict

([('B+', 1310)], [('B+', 1310)])

In [19]:
#Comparing results for largest blood type
assert(result_bt_nt == result_bt_dict), 'Largest Blood Type results are not matching'

In [20]:
result_loc_nt = calc_time_nt_mean_current_location(profiles)
result_loc_dict = calc_time_dict_mean_current_location(profiles_dict)
result_loc_nt, result_loc_dict

([0.15718491810000088, 1.415300867600005],
 [0.15718491810000088, 1.415300867600005])

In [21]:
#Comparing results for mean current location
assert(result_loc_nt == result_loc_dict), 'Mean Current Location results are not matching'

In [22]:
oldest_age_nt, average_age_nt = calc_time_nt_oldest_average_age(profiles)
oldest_age_dict, average_age_dict = calc_time_dict_oldest_average_age(profiles_dict)
oldest_age_nt, average_age_nt, oldest_age_dict, average_age_dict

(42364, 21218.406, 42364, 21218.406)

In [23]:
assert(oldest_age_nt == oldest_age_dict), 'Oldest Age results are not matching'
assert(average_age_nt == average_age_dict), 'Average Age results are not matching'

In [24]:
def timed(repeats=1):
    '''
    Decorator factory to compute the average running time of a function
    repeats is an input argument which indicates how many times the function will be running for average time computation
    default value for repeats = 1 and can be changed by the caller
    '''
    def timer_decorator(fn):
        from time import perf_counter
        from functools import wraps
        @wraps(fn)
        def inner(*args, **kwargs):
            # Call the function for repeats times and compute the average running time for each call
            total_elapsed = 0
            for _ in range(repeats):
                start = perf_counter()
                result = fn(*args, **kwargs)
                end = perf_counter()
                total_elapsed += (end-start)
            avg_time = (total_elapsed/repeats)* int(1e9) #Nano Seconds
            print(f'{fn.__name__}() average running_time: {avg_time:.3f}msec for {repeats} repetitions')
            return round(avg_time, 3), result
        return inner
    return timer_decorator

@timed(1_000_000)
def nt_field_access(profile, check_value):
    '''
    Function to access the fields of a namedtuple
    This is to check how much time it takes to access fields in namedtuple object
    '''
    'latitude' in profile._fields
    'blood_group' in profile._fields
    'random_field' in profile._fields
    check_value in profile
    profile.age

@timed(1_000_000)
def dict_field_access(profile_dict, check_value):
    '''
    Function to access the fields of a dictionary
    This is to check how much time it takes to access fields in dictionary object
    '''
    'latitude' in profile_dict
    'blood_group' in profile_dict
    'random_key' in profile_dict
    check_value in profile_dict.values()
    profile_dict['age']

In [28]:
#T1: Accessing Fields
time_nt, _ = nt_field_access(profiles[0], profiles[0].blood_group)
time_dict, _ = dict_field_access(profiles[0]._asdict(), profiles[0].blood_group)
time_nt, time_dict

nt_field_access() average running_time: 570.745msec for 1000000 repetitions
dict_field_access() average running_time: 627.198msec for 1000000 repetitions


(570.745, 627.198)

In [29]:
assert(time_nt < time_dict), 'FieldAccess: Dictionaires are better than NamedTuples!'

In [30]:
@timed(1_000_000)
def nt_size_compare(profile):
    '''
    Function to retrieve the memory occupied by namedtuple
    '''
    return sys.getsizeof(profile)

@timed(1_000_000)
def dict_size_compare(profile_dict):
    '''
    Function to retrieve the memory occupied by namedtuple
    '''
    return sys.getsizeof(profile_dict)

In [31]:
#T2: Memory Usage
_, nt_size = nt_size_compare(profiles[0])
_, dict_size = dict_size_compare(dict(profiles[0]._asdict()))
nt_size, dict_size

nt_size_compare() average running_time: 470.471msec for 1000000 repetitions
dict_size_compare() average running_time: 462.423msec for 1000000 repetitions


(88, 240)

In [32]:
assert(nt_size < dict_size), 'Memory Usage: Dictionaires occupy less memory than NamedTuples!'

In [33]:
@timed(1_000_000)
def nt_instance_compare(profile1, profile2):
    '''
    Function to compare multiple objects of a namedtuple
    '''
    return profile1 == profile2

@timed(1_000_000)
def dict_instance_compare(profile_dict1, profile_dict2):
    '''
    Function to compare multiple objects of a namedtuple
    '''
    return profile_dict1 == profile_dict2

In [34]:
#T3: Comparing two instances
time_nt = nt_instance_compare(profiles[0], profiles[0])
time_dict = dict_instance_compare(dict(profiles[0]._asdict()), dict(profiles[0]._asdict()))

nt_instance_compare() average running_time: 251.316msec for 1000000 repetitions
dict_instance_compare() average running_time: 326.080msec for 1000000 repetitions


In [35]:
assert(time_nt < time_dict), 'Instance Compare: Dictionaires are faster than NamedTuples!'

In [36]:
@timed(1_000_000)
def nt_unpacking(profile):
    '''
    Function to unpack the values from namedtuple
    '''
    blood_group, latitude, longitude, birthdate, age = profile
    return blood_group, latitude, longitude, birthdate, age

@timed(1_000_000)
def dict_unpacking(profile_dict):
    '''
    Function to unpack the values from dictionary
    '''
    blood_group, latitude, longitude, birthdate, age = profile_dict.values()
    return blood_group, latitude, longitude, birthdate, age

In [37]:
#T4: Unpacking values
time_nt = nt_unpacking(profiles[0])
time_dict = dict_unpacking(dict(profiles[0]._asdict()))

nt_unpacking() average running_time: 379.033msec for 1000000 repetitions
dict_unpacking() average running_time: 516.297msec for 1000000 repetitions


In [38]:
assert(time_nt < time_dict), 'Unpack Values: Dictionaires are faster than NamedTuples!'

In [39]:
@timed(1_000_000)
def nt_create_new_instance(nt_class, new_values):
    '''
    Function to create new instance for the given named tuple
    '''
    profile_new = nt_class._make(new_values)
    return profile_new

@timed(1_000_000)
def dict_create_new_instance(new_values):
    '''
    Function to create new instance for the given dictionary
    '''
    profile_dict_new = dict.fromkeys(['blood_group', 'latitude', 'longitude' 'birthdate', 'age'], 0)
    profile_dict_new['blood_group'] = new_values[0]
    profile_dict_new['latitude'] = new_values[1]
    profile_dict_new['longitude'] = new_values[2]
    profile_dict_new['birthdate'] = new_values[3]
    profile_dict_new['age'] = new_values[4]
    return profile_dict_new

In [40]:
#T5: create new instance
Profile = namedtuple('Profile', 'blood_group latitude longitude birthdate age')
new_values = ['AB+', 123.45, 67.8, date(1979, 2, 22), 32456]
time_nt, _ = nt_create_new_instance(Profile, new_values)
time_dict, _ = dict_create_new_instance(new_values)

nt_create_new_instance() average running_time: 773.928msec for 1000000 repetitions
dict_create_new_instance() average running_time: 1221.181msec for 1000000 repetitions


In [41]:
assert(time_nt < time_dict), 'Create New Instance: Dictionaires are faster than NamedTuples!'

In [42]:
def calc_open_value(stocks, weights):
    '''
    Computes and returns the opening value of the stock exchange
    '''
    *_, open, high, close = list(zip(*stocks))
    open_values = []
    # Iterate through each stock, multiply the its opening and weightage
    # Sum of the above gives the market opening value
    for index, num in enumerate(open):
        open_values.append(num*weights[index])
    return sum(open_values)

def calc_high_value(stocks, weights):
    '''
    Computes and returns the highest value for the day of the stock exchange
    '''
    *_, open, high, close = list(zip(*stocks))
    high_values = []
    # Iterate through each stock, compute the difference between its highest and opening value
    # Multiply the difference with the weight of the stock
    # Sum of the above with the market_open_value of the day gives the market highest point
    for index in range(len(open)):
        diff = high[index]-open[index]
        high_values.append(diff*weights[index])
    open_values = calc_open_value(stocks, weights)
    return sum(high_values) + open_values

def calc_close_value(stocks, weights):
    '''
    Computes and returns the closing value of the stock exchange
    '''
    *_, open, high, close = list(zip(*stocks))
    close_values = []
    # Iterate through each stock, compute the difference between its closing and opening value
    # Multiply the difference with the weight of the stock
    # Sum of the above with the market_open_value of the day gives the market closure value
    for index in range(len(open)):
        diff = close[index]-open[index]
        close_values.append(diff*weights[index])
    open_values = calc_open_value(stocks, weights)
    return sum(close_values) + open_values

In [43]:
def get_market_momentum(stocks, weights):
    '''
    Function to track market momentum
    Given the current values of Stocks, the function computes whether market is going upwards or downwards
    Returns the gain/loss adjusted to 2 decimal points
    '''
    market_open = calc_open_value(stocks, weights)
    market_close = calc_close_value(stocks, weights)
    diff = (market_close - market_open)
    if(diff > 0):
        return f'Markets gained {diff:.2f} points'
    elif(diff == 0):
        return f'Markets are continuing study at {market_open:.2f} points'
    else:
        return f'Markets lost {diff:.2f} points'

In [44]:
def create_stock_exchange(num_stocks=1, stock_values=None, weight_values = None):
    '''
    Function to create stock exchange with the number of stocks requested
    Each stock contains name, symbol, open, high, close values for the trading day
    Random weights are assigned to each company and all the weights ensured to sum up to 1
    The function returns the stocks and the associated weights to the caller
    '''
    def get_stock_info(faker):
        '''
        Uses faker library to generate company names
        Symbol name is choosen as the first part of the company name(Split using spaces)
        Open:
            Indicates the open value of the stock for the given day
            Ranges from 1 to 1500
        High:
            Indicates the highest value the stock touched for the given day
            Ranges from (open to 2000) (or) equal to open
            These two sets of values are used randomly to simulate stock going upwards, downwards
        Close:
            Indicates the closing value of the stock for the given day
            Ranges from (1 to open) (or) equal to high
            These two sets of values are used randomly to simulate stock going upwards, downwards
        The function returns a namedtuple Stock with all the necessary values loaded
        '''
        Stock = namedtuple('Stock', 'name symbol open high close')
        name = faker.company()
        symbol = name.split()[0]
        open = randint(1, 1500)
        if(choice([0,1])):
            high = randint(open, 2000)
            close = high
        else:
            high = open
            close = randint(1, open)
        return Stock(name, symbol, open, high, close)

    def generate_stock_prices(num_stocks=1):
        '''
        This function generates number of stocks based on input argument (default value = 1)
        Each ticker is nothing a but a company stock and will be added to the list of stocks

        The function returns a list with each element is a namedtuple of type 'Stock'
        Check the function get_stock_info() to get more information on 'Stock' namedtuple
        '''
        stocks = []
        fake = Faker()
        Faker.seed(101)
        for _ in range(num_stocks):
            ticker = get_stock_info(fake)
            stocks.append(ticker)
        return stocks

    def generate_stock_weights(num_stocks=1):
        '''
        Function to generate (random) weightage to each listed company in the stock exchange
        Ensured that all the weights sum up to 1
        '''
        w1 = [uniform(0.01, 0.4) for _ in range(num_stocks)]
        weights = [(num/sum(w1)) for num in w1]
        return weights
    # If caller doesn't pass any pre-loaded information for stocks, generate them
    if stock_values is None:
        stocks = generate_stock_prices(num_stocks)
    else:
        # Setting with the values shared by the caller
        stocks = stock_values

    # If caller doesn't pass any pre-loaded information for weights, generate them
    if weight_values is None:
        weights = generate_stock_weights(num_stocks)
    else:
        # Setting with the values shared by the caller
        weights = weight_values
    return stocks, weights

In [45]:
num_stocks = 100

In [46]:
stocks, weights = create_stock_exchange(num_stocks)
stocks[:10], weights[:10]

([Stock(name='Lee-Sanford', symbol='Lee-Sanford', open=331, high=331, close=95),
  Stock(name='Mcdonald, Alvarado and Lewis', symbol='Mcdonald,', open=1354, high=1597, close=1597),
  Stock(name='Roberts PLC', symbol='Roberts', open=810, high=810, close=111),
  Stock(name='Maddox Inc', symbol='Maddox', open=416, high=1420, close=1420),
  Stock(name='Davis PLC', symbol='Davis', open=530, high=745, close=745),
  Stock(name='Ruiz-Nguyen', symbol='Ruiz-Nguyen', open=324, high=337, close=337),
  Stock(name='Powell-Clark', symbol='Powell-Clark', open=481, high=1523, close=1523),
  Stock(name='Torres-Rodriguez', symbol='Torres-Rodriguez', open=267, high=451, close=451),
  Stock(name='Frazier, Ortega and Khan', symbol='Frazier,', open=813, high=813, close=57),
  Stock(name='Gray-Henderson', symbol='Gray-Henderson', open=1414, high=1793, close=1793)],
 [0.011798294709545194,
  0.005072437043092393,
  0.0010376755766624102,
  0.012598466194583996,
  0.0007249423314254227,
  0.013235138532110962,


In [47]:
*_, open, high, close = list(zip(*stocks))
for index in range(len(stocks)):
    #T3: Close should always be <= high value for the day
    assert(close[index] <= high[index])
    #T4: High should always be >= open value for the day
    assert(high[index] >= open[index])
    #T5: Open should always be > 0
    assert(open[index] > 0)

In [48]:
def generate_random_stocks(for_gain = True):
    '''
    Dummy function to create 3 stocks with their open, high, close and their weights of the stock exchange
    for_gain = True
        The open, high, close -> Chosen such a way that, the market is upwards
    for_gain = False
        The open, high, close -> Chosen such a way that, the market is downwards
    '''
    Stock = namedtuple('Stock', 'name symbol open high close')
    if for_gain:
        tsai = Stock('TSAI', 'TSA', 1400, 1560, 1500)
        skunkworks = Stock('SKUNKWORKS', 'SKW', 2000, 2300, 2050)
        inkers = Stock('INKERS', 'INK', 1675, 1987, 1900)
    else:
        tsai = Stock('TSAI', 'TSA', 1400, 1400, 1200)
        skunkworks = Stock('SKUNKWORKS', 'SKW', 2000, 2000, 1850)
        inkers = Stock('INKERS', 'INK', 1675, 1675, 1500)

    weights = [0.45, 0.35, 0.2]
    return [tsai, skunkworks, inkers], weights

In [49]:
stocks, weights = generate_random_stocks(for_gain=True)
stocks_new, weights_new = create_stock_exchange(3, stocks, weights)

In [50]:
stocks_new, weights_new

([Stock(name='TSAI', symbol='TSA', open=1400, high=1560, close=1500),
  Stock(name='SKUNKWORKS', symbol='SKW', open=2000, high=2300, close=2050),
  Stock(name='INKERS', symbol='INK', open=1675, high=1987, close=1900)],
 [0.45, 0.35, 0.2])

In [51]:
#T7: Market Gain Check
assert 'gain' in get_market_momentum(stocks_new, weights_new), 'Market should be gaining!'

In [52]:
stocks, weights = generate_random_stocks(for_gain=False)
stocks_new, weights_new = create_stock_exchange(3, stocks, weights)

In [53]:
stocks_new, weights_new

([Stock(name='TSAI', symbol='TSA', open=1400, high=1400, close=1200),
  Stock(name='SKUNKWORKS', symbol='SKW', open=2000, high=2000, close=1850),
  Stock(name='INKERS', symbol='INK', open=1675, high=1675, close=1500)],
 [0.45, 0.35, 0.2])

In [54]:
#T8: Market Loss Check
assert 'lost' in get_market_momentum(stocks_new, weights_new), 'Market is supposed to be in loss!'