In [2]:
import numpy as np
from numpy.random import randint, choice
import pandas as pd
import string
import datetime
import linecache
import PySimpleGUI as sg

In [3]:
AD_PROB = {
    'autumn': (0.7, 0.2, 0.1),
    'winter': (0.4, 0.1, 0.5),
    'spring': (0.6, 0.3, 0.1),
    'summer': (0.5, 0.4, 0.1),
}
SEASONS = (
    'autumn',
    'winter',
    'spring',
    'summer'
)
MONTH_RANGE = {
    'autumn': (9, 92),
    'winter': (11, 90),
    'spring': (3, 91),
    'summer': (6, 91)
}
EMAILS = (
    'gmail.com',
    'mail.ru',
    'outlook.com',
    'yahoo.com',
    'student.spbu.ru',
    'yandex.ru'
)
CATEGORIES = (
    'apparel',
    'home-and-garden',
    'jewelery'
)
CATEGORIES_LENGTH = {
    'apparel': 20,
    'home-and-garden': 20,
    'jewelery': 20
}
ALPHABET = list(string.ascii_letters + string.digits)
YEAR = 2023
WEBSITES_NUM = 50
AD_DUR_COEFF = randint(20, 361)
SIZE = 50000

chosen_season = 'autumn'

In [4]:
def ip_is_correct(ip) -> bool:
    first, second, third, fourth = ip[0], ip[1], ip[2], ip[3]
    return not (first == 10 and (0 <= second <= 255 or 0 <= third <= 255 or 0 <= fourth <= 255) or
        first == 172 and (16 <= second <= 31 or 0 <= third <= 255 or 0 <= fourth <= 255) or
        first == 192 and second == 168 and (0 <= third <= 255 or 0 <= fourth <= 255) or
        first == 100 and (64 <= second <= 127 or 0 <= third <= 255 or 0 <= fourth <= 255))

In [5]:
def generate_email(EMAILS) -> string:
    return f"{''.join(choice(ALPHABET, size=randint(2, 10)))}@{choice(EMAILS, size=1)[0]}"

In [6]:
def generate_ip() -> string:
    ip = (randint(0, 255), randint(0, 255), randint(0, 255), randint(0, 255))
    while (not ip_is_correct(ip)):
        ip = (randint(0, 255), randint(0, 255), randint(0, 255), randint(0, 255))
    return f'{ip[0]}.{ip[1]}.{ip[2]}.{ip[3]}'

In [7]:
def generate_date(MONTH_RANGE, chosen_season) -> string:
    delta = datetime.timedelta(
        days=randint(MONTH_RANGE[chosen_season][1]),
        hours=randint(25),
        minutes=randint(61),
    )
    date = datetime.datetime(
        year=YEAR,
        month=MONTH_RANGE[chosen_season][0],
        day=randint(1, 31),
        hour=0,
        minute=0
    )
    return (date + delta).isoformat()

In [8]:
def generate_platform() -> string:
    return linecache.getline('data/websites.txt', randint(1, 51)).rstrip()

In [9]:
def generate_ad(CATEGORIES, AD_PROB, CATEGORIES_LENGTH, chosen_season) -> string:
    category = choice(CATEGORIES, size=1, p=AD_PROB[chosen_season])[0]
    return linecache.getline(f'data/{category}.txt', randint(CATEGORIES_LENGTH[category] + 1)).rstrip()

In [10]:
def generate_row(AD_DUR_COEFF, chosen_season) -> dict:
    ad_num = randint(1, 101)
    ad_time = ad_num * AD_DUR_COEFF
    row = {
        'email': generate_email(),
        'ip': generate_ip(),
        'platform': generate_platform(),
        'date': generate_date(chosen_season),
        'ad_num': ad_num,
        'ad_time': ad_time,
        'ad_type': generate_ad(chosen_season),
    }

    return row

In [11]:
def generate_file(SIZE) -> list:
    return [generate_row() for i in range(int(SIZE))]

In [12]:
def generate_excel() -> None:
    df = pd.DataFrame.from_records(generate_file(), columns=[
        'email',
        'ip',
        'platform',
        'date',
        'ad_num',
        'ad_time',
        'ad_type'
    ])
    df.to_excel('output.xlsx', index=False)

In [13]:
sg.theme('DarkPurple1')

def new_window(window, coeff=False):
    if window is not None:
        window.close()
    layout = [
    [sg.Push(), sg.Text('SPECIFY YOUR DATA', font=('Courier New', 16)), sg.Push()],
    [sg.Push(), sg.Text('num of rows:', font=('Courier New', 16)), sg.Input(size=(10, 10), font=('Courier New', 16), key='-size-'), sg.Push()],
    [sg.Push(), sg.Radio('random ad duration', "RAD1", default=(not coeff), font=('Courier New', 16), enable_events=True, key='-radio1-'),
    sg.Radio('specify ad duration', "RAD1", default=coeff, font=('Courier New', 16), key='-radio2-', enable_events=True), sg.Push()],
    [sg.Push(), sg.Text('ad duration:', visible=coeff, font=('Courier New', 16)),
    sg.Input(size=(6, 1), visible=coeff, key='-coeff-', font=('Courier New', 16)), sg.Push()],
    [sg.Push(), sg.Text('year:', font=('Courier New', 16)), sg.Slider(range=(2000, 2023), default_value=2000, orientation='horizontal', key='-year-'), sg.Push()],
    [sg.Push(), sg.Text('season:', font=('Courier New', 16)), sg.Combo(values=['autumn', 'winter', 'spring', 'summer'], default_value='autumn', font=('Courier New', 16), key='-season-'), sg.Push()],
    [sg.Push(), sg.Submit(key='btnSubmit', font=('Courier New', 16)), sg.Push()],
    ]

    return sg.Window('Specifying data', layout, size=(700,400), element_padding=15)

window = new_window(None)

def submit_is_correct(size, year, season, coeff, radio1):
    if size == '' or size is None or year == '' or year is None or season == '' or season is None:
        return False
    return (
        1 <= int(size) <= 50000
        and 2000 <= int(year) <= 2023
        and season in ['autumn', 'winter', 'spring', 'summer']
        and (((coeff == '' or coeff is None) and radio1) or ((coeff != '' and coeff is not None) and not radio1))
    )

while True:
    event, values = window.read()
    print(values)
    if event == "-radio1-":
        window = new_window(window, coeff=False)
    elif event == '-radio2-':
        window = new_window(window, coeff=True)
    elif event == 'btnSubmit':
        if submit_is_correct(values['-size-'], values['-year-'], values['-season-'], values['-coeff-'], bool(values['-radio1-'])):
            SIZE = int(values['-size-'])
            YEAR = values['-year-']
            if values['-radio2-'] and values['-coeff-'] != '':
                AD_DUR_COEFF = values['-coeff-']
            chosen_season = values['-season-']
            df = pd.DataFrame.from_records(generate_file(), columns=[
                'email',
                'ip',
                'platform',
                'date',
                'ad_num',
                'ad_time',
                'ad_type'
            ])
            print('GENERATING!')
            df.to_excel('output.xlsx', index=False)
            break
    elif event == sg.WIN_CLOSED: 
        break

window.close()

{'-size-': '10', '-radio1-': True, '-radio2-': False, '-coeff-': '', '-year-': 2004.0, '-season-': 'spring'}
{'-size-': '100', '-radio1-': True, '-radio2-': False, '-coeff-': '', '-year-': 2008.0, '-season-': 'spring'}
100


TypeError: integer argument expected, got float

: 