In [1]:
import datetime
import random
from itertools import chain

import numpy as np
import pandas as pd
from faker import Faker

In [2]:
new_games_df = pd.read_csv("data/games.csv", encoding= 'unicode_escape', sep = ";")

In [3]:
games_df = new_games_df[[ "details.name","details.yearpublished", 'details.playingtime', 'details.minage',  "details.minplayers", "details.maxplayers" , "details.description", "game.type"]]
games_df.rename(columns = {"details.name":"Name", "details.yearpublished": "Year Published", 'details.playingtime':"Playing Time", 'details.minage': "Min Age", "details.minplayers": "Min Players", "details.maxplayers": "Max Players", "details.description":"Description", "game.type":"Type"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [4]:
games_df.head()

Unnamed: 0,Name,Year Published,Playing Time,Min Age,Min Players,Max Players,Description,Type
0,Die Macher,1986.0,240.0,14.0,3.0,5.0,Die Macher is a game about seven sequential po...,boardgame
1,Dragonmaster,1981.0,30.0,12.0,3.0,4.0,Dragonmaster is a trick-taking card game based...,boardgame
2,Samurai,1998.0,60.0,10.0,2.0,4.0,"Part of the Knizia tile-laying trilogy, Samura...",boardgame
3,Tal der Könige,1992.0,60.0,12.0,2.0,4.0,When you see the triangular box and the luxuri...,boardgame
4,Acquire,1964.0,90.0,12.0,3.0,6.0,"In Acquire, each player strategically invests ...",boardgame


### CONSTANTS

In [250]:
GAMES_AMOUNT = 50
CUSTOMERS_AMOUNT = 1000
EMPLOYEES_AMOUNT = 4
YEAR = 2022
PRICE_FOR_DAY = 10

### GAMES

In [251]:
games_tbl = games_df.sample(n = GAMES_AMOUNT)
games_tbl.insert(0, "game_id", np.arange(1, GAMES_AMOUNT+1))
games_tbl["Price"] = np.round(np.random.uniform(100, 200, GAMES_AMOUNT)) + 0.99
games_tbl.head()

Unnamed: 0,game_id,Name,Year Published,Playing Time,Min Age,Min Players,Max Players,Description,Type,Price
3816,1,Feudal Lord,1983.0,60.0,12.0,1.0,16.0,Feudal Lord is a Japanese-language game for up...,boardgame,167.99
12145,2,Zombies!!! The Card Game,2012.0,60.0,13.0,2.0,6.0,Game description from the publisher:&#10;&#10;...,boardgame,173.99
11695,3,18PA,2011.0,180.0,0.0,3.0,5.0,18PA is a 18xx game roughly covering the area ...,boardgame,193.99
4242,4,Mississippi Banzai,1990.0,180.0,12.0,2.0,2.0,MISSISSIPPI BANZAI is another in XTR's many al...,boardgame,171.99
9370,5,Race 3000,2009.0,20.0,7.0,2.0,4.0,Do you have what it takes to cross the finish ...,boardgame,153.99


### CUSTOMERS

In [252]:
eng_first_names_df = pd.read_csv("data/english_first_names.csv").sort_values(by = ["Rank"])
eng_last_names_df = pd.read_csv("data/english_last_names.csv", sep=";")
pl_first_names_w_df = pd.read_csv("data/polish_female_names.csv")
pl_first_names_m_df = pd.read_csv("data/polish_male_names.csv")
pl_last_names_w_df = pd.read_csv("data/polish_female_last_names.csv")
pl_last_names_m_df = pd.read_csv("data/polish_male_last_names.csv")

In [253]:
# most popular names and surnames
eng_first_names = eng_first_names_df["Child's First Name"][0:2000]
eng_last_names = eng_last_names_df["SURNAME"] # only 1000 most popular
pl_first_names_w = pl_first_names_w_df["IMIĘ PIERWSZE"][0:100]
pl_first_names_m = pl_first_names_m_df["IMIĘ PIERWSZE"][0:100]
pl_last_names_w= pl_last_names_w_df["Nazwisko aktualne"][0:500]
pl_last_names_m = pl_last_names_m_df["Nazwisko aktualne"][0:500]
customers_tbl = pd.DataFrame()

In [254]:
proportions = np.array([0.4, 0.25, 0.35]) # ALL_ENG, W_PL, M_PL
numbers = proportions * CUSTOMERS_AMOUNT
phone_numbers = random.sample(range(100000000, 999999999), CUSTOMERS_AMOUNT + EMPLOYEES_AMOUNT)

In [255]:
customers_tbl["customer_id"] = np.arange( 1, CUSTOMERS_AMOUNT+1)
customers_tbl["first_name"]= np.concatenate([np.random.choice(eng_first_names,int( numbers[0])), np.random.choice(pl_first_names_w,int( numbers[1])), np.random.choice(pl_first_names_m,int( numbers[2]))])
customers_tbl["last_name"]= np.concatenate([np.random.choice(eng_last_names,int( numbers[0])), np.random.choice(pl_last_names_w,int( numbers[1])), np.random.choice(pl_last_names_m,int( numbers[2]))])
customers_tbl["first_name"] = customers_tbl["first_name"].apply(str.capitalize)
customers_tbl["last_name"] = customers_tbl["last_name"].apply(str.capitalize)

In [256]:
fake = Faker()
birth_dates = [fake.date_between(start_date = "-50y", end_date = "-20y").strftime("%d.%m.%Y") for _ in range(CUSTOMERS_AMOUNT)]
customers_tbl["birth_date"] = birth_dates

In [257]:
def generate_email(row):
    return row["first_name"].lower()+"."+ row["last_name"].lower() + "@mail.com"

In [258]:
customers_tbl["email"] = customers_tbl.apply(lambda row: generate_email(row), axis=1)

In [259]:
customers_tbl["phone_number"] = phone_numbers[0:CUSTOMERS_AMOUNT]

In [260]:
customers_tbl.head()

Unnamed: 0,customer_id,first_name,last_name,birth_date,email,phone_number
0,1,Chloe,Strickland,04.08.1990,chloe.strickland@mail.com,592337330
1,2,Emma,Aguirre,03.09.1997,emma.aguirre@mail.com,272611469
2,3,Grace,Blackwell,26.02.1986,grace.blackwell@mail.com,931845189
3,4,Daniel,Hickman,17.08.1993,daniel.hickman@mail.com,872738347
4,5,Sarah,Mcdaniel,08.09.1996,sarah.mcdaniel@mail.com,679652825


## Employees

In [261]:
employees_tbl = pd.DataFrame()
women_amount = random.randint(1, EMPLOYEES_AMOUNT)
men_amount = EMPLOYEES_AMOUNT - women_amount
employees_tbl["employee_id"] = np.arange( 1, EMPLOYEES_AMOUNT+1)
employees_tbl["first_name"] = np.concatenate( [np.random.choice(pl_first_names_w,int(women_amount)), np.random.choice(pl_first_names_m,int( men_amount ))])
employees_tbl["last_name"] = np.concatenate( [np.random.choice(pl_last_names_w,int(women_amount)), np.random.choice(pl_last_names_m,int( men_amount))])
employees_tbl["email"] = employees_tbl.apply(lambda row: generate_email(row), axis=1)
employees_tbl["phone_number"] = phone_numbers[-EMPLOYEES_AMOUNT-1: -1]
employees_tbl["birth_date"] = [fake.date_between(start_date = "-50y", end_date = "-20y").strftime("%d.%m.%Y") for _ in range(EMPLOYEES_AMOUNT)]
employees_tbl["start_work_date"] = [fake.date_between(start_date = "-80y", end_date = "-2y").strftime("%d.%m.%Y") for _ in range(EMPLOYEES_AMOUNT)]

In [262]:
employees_tbl

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,birth_date,start_work_date
0,1,OLGA,MATUSIAK,olga.matusiak@mail.com,941580814,04.12.1992,21.10.1946
1,2,DANUTA,ŚLUSARCZYK,danuta.ślusarczyk@mail.com,703494304,28.06.1976,11.04.1983
2,3,OLIWIA,JAGIEŁŁO,oliwia.jagiełło@mail.com,787148776,24.06.1991,06.05.1962
3,4,BOGUSŁAWA,KUREK,bogusława.kurek@mail.com,980527271,26.04.1981,24.10.1965


### Payoffs

Komunistycznie na razie wszyscy dostają tyle samo XD

In [263]:
payoff_tbl = pd.DataFrame()

## Turnieje

### Terminarz/ termines

Przy założeniu że jeden raz w miesiącu (np w pierwszy czwartek miesiąca)

In [264]:
termines_tbl = pd.DataFrame()
termines_tbl["tournament_id"] = range(1, 13)
first_thursdays = [(datetime.date(YEAR, month, 1) + datetime.timedelta(days=((4 - datetime.date(YEAR, month, 1).weekday()) % 7))).strftime("%d.%m.%y") for month in range(1, 13)]
termines_tbl["date"] = first_thursdays

In [265]:
termines_tbl.head()

Unnamed: 0,tournament_id,date
0,1,07.01.22
1,2,04.02.22
2,3,04.03.22
3,4,01.04.22
4,5,06.05.22


### Turnieje

Z gier które zostały wyznaczone dla sklepu losuję 5 gier, które będą grami turniejowymi. (typ = board game, max graczy >= 4)

In [266]:
#games_tbl[( games_tbl["Type"] == "boardgame") & ( games_tbl["Max Players"] >= 4) ]["game_id"].to_list()
tournament_games  = random.sample( games_tbl[( games_tbl["Type"] == "boardgame") & ( games_tbl["Max Players"] >= 4) ]["game_id"].to_list(), 5)

In [267]:
tournaments_tbl = pd.DataFrame()
tournaments_tbl["tournament_id"] = range(1, 13)
tournaments_tbl["game_id"] = random.choices(tournament_games, k = 12)
tournaments_tbl["max_players"] = 16 * 4
tournaments_tbl["entry_fee"] = 20
tournaments_tbl["prize"] = 150

In [268]:
tournaments_tbl.head()

Unnamed: 0,tournament_id,game_id,max_players,entry_fee,prize
0,1,1,64,20,150
1,2,1,64,20,150
2,3,2,64,20,150
3,4,42,64,20,150
4,5,42,64,20,150


### Wyniki

In [269]:
results_tbl = pd.DataFrame()
tournament_ids = [64 * [i] for i in tournaments_tbl["tournament_id"]]
results_tbl["tournament_id"] = [i for i in chain.from_iterable(tournament_ids)]
results_tbl["position"] = [place for place  in range(1,65)] * 12
results = [random.sample(customers_tbl["customer_id"].to_list(), k = 64) for _ in range(1, 13)]
results_tbl["customer_id"] = [i for i in chain.from_iterable(results)]
results_tbl.insert(0,"result_id", range(1,len(results_tbl)+1))
results_tbl.head()

Unnamed: 0,result_id,tournament_id,position,customer_id
0,1,1,1,513
1,2,1,2,324
2,3,1,3,858
3,4,1,4,204
4,5,1,5,981


### Rentals

In [270]:
def generate_list_with_occurrences(numbers, occurrences):
    result = [number  for number, occurrence in zip(numbers, occurrences) for _ in range(occurrence)]
    return result

# def generate_date_from_day_number(row):
#     return datetime.datetime.strptime(str(YEAR) + "-" + row["day_num"], "%Y-%j").strftime("%d.%m.%Y")

# def generate_return_date(row):
#     rental_date = datetime.datetime.strptime(row["rental_date"], "%d.%m.%Y")
#     return_date = rental_date  + datetime.timedelta(days = np.random.randint(1, 7) ) # maksymalnie tydzien 

#     return  return_date.strftime("%d.%m.%Y") if return_date.year == rental_date.year else None

# def rental_duration(row):
#     try:
#         rental_date = datetime.datetime.strptime(row["rental_date"], "%d.%m.%Y")
#         return_date = datetime.datetime.strptime(row["return_date"], "%d.%m.%Y")
#     except TypeError:
#         return None
#     return ( return_date - rental_date).days 

In [284]:
rentals_tbl = pd.DataFrame()
day_of_year = np.arange(1, pd.Timestamp(YEAR, 12, 31).dayofyear + 1)
rent_daily = [np.random.poisson(3) for _ in range(pd.Timestamp(YEAR, 12, 31).dayofyear)]
rentals_tbl["customer_id"] =  random.choices(customers_tbl["customer_id"].to_list(), k = np.sum(rent_daily) )
rentals_tbl["game_id"] = random.choices(range(1,GAMES_AMOUNT+1), k = np.sum(rent_daily) )
rentals_tbl["rent_day"]= generate_list_with_occurrences(day_of_year, rent_daily)
rentals_tbl["duration"] = np.random.poisson(3, rentals_tbl.shape[0]) + 1
rentals_tbl["return_day"] = rentals_tbl["rent_day"] + rentals_tbl["duration"]
#rentals_tbl["rental_date"] = rentals_tbl.apply(lambda row: generate_date_from_day_number(row), axis=1)
#rentals_tbl["return_date"] = rentals_tbl.apply(lambda row: generate_return_date(row), axis=1)

#rentals_tbl["duration"] = rentals_tbl.apply(lambda row: rental_duration(row), axis=1)
#rentals_tbl = rentals_tbl.drop("day_num", axis = 1)
rentals_tbl.sort_values(by = ["rent_day", "return_day"])
rentals_tbl.insert(0, "rental_id", np.arange(1, rentals_tbl.shape[0]+1))

In [285]:
rentals_tbl.head(10)

Unnamed: 0,rental_id,customer_id,game_id,rent_day,duration,return_day
0,1,460,27,1,4,5
1,2,359,11,1,7,8
2,3,26,47,2,4,6
3,4,514,14,2,4,6
4,5,714,36,2,3,5
5,6,322,31,2,4,6
6,7,56,20,2,5,7
7,8,660,38,2,2,4
8,9,533,47,3,4,7
9,10,13,3,3,4,7


In [296]:
rentals_tbl["inventory_id"] = np.repeat(None, rentals_tbl.shape[0])
inventory = {} # game_id : [inventory_id]
available = {} # game_id : [inventory_id]
will_return = {} # return day : {inventory_id}

inv_counter = 1

for day in range(1, 366):
    print(f"day:{day}")
    temp = rentals_tbl[rentals_tbl["rent_day"] == day ]
    games_needed = temp["game_id"]
    print(f"games needed: {games_needed.to_list()}")
    for game_id in games_needed:
        try : # if available  set inv_id
            inv_id = available[game_id].pop()
        except : # if not available add to inventory and set inv_id
            if game_id in inventory.keys():
                inventory[game_id].append(inv_counter)
            else:
                inventory[game_id] = [inv_counter]
                available[game_id] = []
            inv_id = inv_counter
            inv_counter += 1

        #set inv_id in rentals_tbl
        rentals_tbl.loc[(rentals_tbl["rent_day"] == day)&(rentals_tbl["game_id"] == game_id), "inventory_id"] = inv_id
        # add current inv_id to will return
        return_day = rentals_tbl.loc[(rentals_tbl["rent_day"] == day) & (rentals_tbl["inventory_id"] == inv_id), "return_day"].to_list()[0] 
        
        if return_day in will_return.keys():
            will_return[return_day].append((game_id, inv_id))
        else:
            will_return[return_day] = [(game_id, inv_id)]
    #move from will return to available

    try:
        print(f"returns today: {will_return[day]}")
        for key, value in enumerate( will_return[day]):
            print(key)
            game_id, inv_id = will_return[day][key]
            available[game_id].insert(0, inv_id)
    except:
        pass


    print(f"available: {available}")
    #print(f"current inventory: {inventory}")
    print(f"will return: {will_return}")

rentals_tbl.head(10)

day:1
games needed: [27, 11]
available: {27: [], 11: []}
will return: {5: [(27, 1)], 8: [(11, 2)]}
day:2
games needed: [47, 14, 36, 31, 20, 38]
available: {27: [], 11: [], 47: [], 14: [], 36: [], 31: [], 20: [], 38: []}
will return: {5: [(27, 1), (36, 5)], 8: [(11, 2)], 6: [(47, 3), (14, 4), (31, 6)], 7: [(20, 7)], 4: [(38, 8)]}
day:3
games needed: [47, 3, 34, 12]
available: {27: [], 11: [], 47: [], 14: [], 36: [], 31: [], 20: [], 38: [], 3: [], 34: [], 12: []}
will return: {5: [(27, 1), (36, 5)], 8: [(11, 2), (34, 11), (12, 12)], 6: [(47, 3), (14, 4), (31, 6)], 7: [(20, 7), (47, 9), (3, 10)], 4: [(38, 8)]}
day:4
games needed: [2, 15, 35]
returns today: [(38, 8)]
0
available: {27: [], 11: [], 47: [], 14: [], 36: [], 31: [], 20: [], 38: [8], 3: [], 34: [], 12: [], 2: [], 15: [], 35: []}
will return: {5: [(27, 1), (36, 5)], 8: [(11, 2), (34, 11), (12, 12)], 6: [(47, 3), (14, 4), (31, 6)], 7: [(20, 7), (47, 9), (3, 10), (15, 14), (35, 15)], 4: [(38, 8)], 12: [(2, 13)]}
day:5
games needed:

Unnamed: 0,rental_id,customer_id,game_id,rent_day,duration,return_day,inventory_id
0,1,460,27,1,4,5,1
1,2,359,11,1,7,8,2
2,3,26,47,2,4,6,3
3,4,514,14,2,4,6,4
4,5,714,36,2,3,5,5
5,6,322,31,2,4,6,6
6,7,56,20,2,5,7,7
7,8,660,38,2,2,4,8
8,9,533,47,3,4,7,9
9,10,13,3,3,4,7,10


In [299]:
np.unique(rentals_tbl["inventory_id"])

array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
       20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
       37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
       54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
       71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
       88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130], dtype=object)

In [390]:
inventory_rent_tbl = pd.DataFrame()
inventory_rent_tbl["game_id"] = generate_list_with_occurrences(range(1, GAMES_AMOUNT+1), inventory_amount)
inventory_rent_tbl["inventory_id"] = np.arange(1, inventory_rent_tbl.shape[0] + 1)
inventory_rent_tbl["type"] = np.repeat("R", inventory_rent_tbl.shape[0])

In [392]:
inventory_rent_tbl.head()

Unnamed: 0,game_id,inventory_id,type
0,1,1,R
1,1,2,R
2,1,3,R
3,1,4,R
4,1,5,R


### Sales

In [403]:
sales_tbl = pd.DataFrame()
day_of_year = np.arange(1, pd.Timestamp(YEAR, 12, 31).dayofyear + 1)
sold_daily = [np.random.poisson(4) for _ in range(pd.Timestamp(YEAR, 12, 31).dayofyear)]
sales_tbl["customer_id"] =  random.choices(customers_tbl["customer_id"].to_list(), k = np.sum(sold_daily) )
sales_tbl["game_id"] = random.choices(range(1,GAMES_AMOUNT+1), k = np.sum(sold_daily) )
sales_tbl["day_num"]= generate_list_with_occurrences(day_of_year, sold_daily)
sales_tbl["date"] = sales_tbl.apply(lambda row: generate_date_from_day_number(row), axis=1)
sales_tbl = sales_tbl.drop("day_num", axis = 1)

In [404]:
sales_tbl

Unnamed: 0,customer_id,game_id,day_num,date
0,970,28,1,01.01.2022
1,184,23,1,01.01.2022
2,949,74,1,01.01.2022
3,226,28,2,02.01.2022
4,351,81,2,02.01.2022
...,...,...,...,...
1455,375,73,364,30.12.2022
1456,568,50,364,30.12.2022
1457,906,71,364,30.12.2022
1458,331,80,365,31.12.2022
