## Generates dummy data

In [1]:
"""File to generate sample data for testing."""
from datetime import date, timedelta, datetime
import random
import math
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [11]:
from sample_data import type_list,value_sign,day,adequacy_list,choices,last_week_date, last_month_date,results_list, genotype_list

In [4]:
def week_day(n:int):
    days_of_the_week=["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]
    return days_of_the_week[n%7]

## Generates the numer of tests processed in a given date

In [5]:
### this function defines how busy each day of the week is
def busy_days(n:int):
    ans=70
    if n==6:
        ans=0
    return ans
    
    
    

In [6]:
### this function defines how busy each month is
def busy_months(n:int):
    ans=1
    if n ==12:
        ans= 0.8
    if n in [1,2,6,7]:
        ans=1.2
    return ans    
    
    

In [7]:
def number_of_tests(fecha): ### This takes a date and returns how many tests occur
    ans=int (busy_days(fecha.weekday())*busy_months(fecha.month)*np.random.uniform(0.95,1.05))
    return ans


## Generate a new row given a data

In [8]:
def make_row(fecha):############ This takesa a date and returns an entry in the Records_df
    type_of_test=random.choices(type_list,weights=[3,4])[0]
    adequacy=random.choices(adequacy_list,weights=[5,2,1])[0]
    result=random.choices(results_list,weights=[4,1,1,1,1])[0]
    gen_type=random.choice(genotype_list)
    cytology=random.choices(value_sign,weights=[4,1])[0]+"cytology"
    hystology="Not prescribed"
    if cytology=="Positivecytology":
        hystology=random.choices(value_sign,weights=[92,8])[0]+"hystology"
    
    new_row = pd.DataFrame([{'day':fecha.date(), 'type':type_of_test,'adequacy':adequacy, 'result':result,'genotype':gen_type,'cytology':cytology,'hystology':hystology}])
    return new_row

## Choose the date range 

In [14]:
starting_date = date.today() - timedelta(days = 366)
ending_date=date.today() +timedelta(days = 20)

## Generate test_df

In [15]:
def generate_test_df():
    columns=['day', 'type', 'adequacy', 'result', 'genotype', 'cytology', 'hystology']
    test_df = pd.DataFrame(columns=columns)
    daterange = pd.date_range(starting_date, ending_date, freq='D')
    for item in daterange:
        N_of_test=number_of_tests(item)
        for patient in range(N_of_test):
            new_row=make_row(item)
            test_df = pd.concat([test_df, new_row], axis=0, ignore_index=True)
    return test_df

In [16]:
Records_df=generate_test_df()

### GENERATE FREQUENCY DATA FRAME

In [19]:
def new_row_frequency(df,fecha):
    columnas= list(Records_df.columns)[1:]
    new_row_in={"day":fecha}
    df_fecha=df[df["day"]==fecha]
    new_row_in["All"]=df_fecha.shape[0]
    for columna in columnas:
        column_values=list(df_fecha[columna].unique())
        for tipo in column_values:
            temp_df=df_fecha[df_fecha[columna]==tipo]
            new_row_in[tipo]=temp_df.shape[0]
    types=list(df_fecha["type"].unique())
    adequacies=list(df_fecha["adequacy"].unique())
    cytologies=list(df_fecha["cytology"].unique())
    hystologies=list(df_fecha["hystology"].unique())
    for tipo in types:
        for ad in adequacies:
            temp_df=df_fecha[(df_fecha["type"]==tipo) & (df_fecha["adequacy"]==ad)]
            new_row_in[tipo+ad]=temp_df.shape[0]
        for cyt in cytologies:
            temp_df=df_fecha[(df_fecha["type"]==tipo) & (df_fecha["cytology"]==cyt)]
            new_row_in[tipo+cyt]=temp_df.shape[0]
        for hyst in hystologies:
            temp_df=df_fecha[(df_fecha["type"]==tipo) & (df_fecha["hystology"]==hyst)]
            new_row_in[tipo+hyst]=temp_df.shape[0]


        # new_row_in["weekday"]=week_day(fecha.weekday())
    
    return pd.DataFrame([new_row_in])

In [20]:
def make_frequency_df(df):
    fechas=df["day"].unique()
    freq_df=pd.DataFrame()
    for fecha in fechas:
        freq_df=pd.concat([freq_df,new_row_frequency(df,fecha)],axis=0, ignore_index=True)
    
    return freq_df

In [21]:
Frequency_df=make_frequency_df(Records_df)

In [22]:
Frequency_df.head()

Unnamed: 0,day,All,Conventional,Liquid based,Sat,Insat_NP,Insat_P,Negative,ASC-US,SCC,...,ConventionalNot prescribed,ConventionalNegativehystology,Liquid basedSat,Liquid basedInsat_NP,Liquid basedInsat_P,Liquid basedPositivecytology,Liquid basedNegativecytology,Liquid basedPositivehystology,Liquid basedNot prescribed,Liquid basedNegativehystology
0,2021-12-22,56,31,25,40,5,11,29,6,7,...,8,0.0,17,4,4,22,3,21,3,1.0
1,2021-12-23,56,30,26,35,7,14,27,6,4,...,6,1.0,17,3,6,18,8,17,8,1.0
2,2021-12-24,57,31,26,38,9,10,23,14,6,...,7,2.0,17,3,6,21,5,19,5,2.0
3,2021-12-25,55,26,29,35,9,11,30,3,2,...,6,2.0,16,6,7,21,8,20,8,1.0
4,2021-12-27,57,37,20,34,5,18,27,15,4,...,6,2.0,11,1,8,16,4,14,4,2.0


In [23]:
Frequency_df.to_csv('Frequency.csv')