In [1]:
from datetime import date, timedelta
import holidays
import pandas as pd
import csv
import random

In [2]:
def calendar(year,country='ES',subdiv=None):
    """
    returns a dataframe with three columns:
    dates: with format [YYYY-MM-DD], ascending
    weekend: with value 1 if it's Saturday or Sunday, otherwise it returns 0.
    holidays: with value 1 if it's public holidays, otherwise it returns 0.
    """
    calendar=pd.DataFrame()

    def daterange(year):
        calendar = pd.DataFrame()
        calendar['dates']=pd.date_range(f'{year}-01-01', f'{year}-12-31', freq='D')
        return calendar

    calendario = daterange(year)

    def weekends(calendar):
        calendar['weekday']= calendar['dates'].apply(lambda x: x.weekday())
        weekend = {0:0,1:0,2:0,3:0,4:0,5:1,6:1}
        calendar['weekend'] = calendar['weekday'].map(weekend)
        calendar.drop(columns='weekday',inplace=True)
        return calendar

    calendario = weekends(calendario)

    def public_holidays(calendar):
        es_cl_holidays = holidays.country_holidays(country,subdiv)
        calendar['holidays']=calendar['dates'].apply(lambda x: x in es_cl_holidays).map(int)
        return calendar
    calendario = public_holidays(calendario)

    def times(calendario):
        times=['Morning','Evening','Night']
        for i in times:
            calendario[i] = pd.Series(0)
        return calendario
    calendario = times(calendario)
    return calendario



In [3]:
calendario = calendar(2022,'ES','CL')
calendario['holidays'].value_counts()

0    353
1     12
Name: holidays, dtype: int64

In [4]:
calendario['Month'] = calendario['dates'].apply(lambda x: x.strftime('%b'))
calendario

Unnamed: 0,dates,weekend,holidays,Morning,Evening,Night,Month
0,2022-01-01,1,1,0.0,0.0,0.0,Jan
1,2022-01-02,1,0,,,,Jan
2,2022-01-03,0,0,,,,Jan
3,2022-01-04,0,0,,,,Jan
4,2022-01-05,0,0,,,,Jan
...,...,...,...,...,...,...,...
360,2022-12-27,0,0,,,,Dec
361,2022-12-28,0,0,,,,Dec
362,2022-12-29,0,0,,,,Dec
363,2022-12-30,0,0,,,,Dec


In [5]:
calendario.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   dates     365 non-null    datetime64[ns]
 1   weekend   365 non-null    int64         
 2   holidays  365 non-null    int64         
 3   Morning   1 non-null      float64       
 4   Evening   1 non-null      float64       
 5   Night     1 non-null      float64       
 6   Month     365 non-null    object        
dtypes: datetime64[ns](1), float64(3), int64(2), object(1)
memory usage: 20.1+ KB


In [6]:
def number_workers():
    worker= input("how many workers do you have? >>")
    try:
        worker = int(worker)
        return worker
    except ValueError:
        print("That's not a number!")

# Employees Dataframe

def df_workers(number_of_workers):

    def name_workers():
        name = input('How is your worker called >>')
        if name.strip().isdigit():
            print("That's not a name!")
        else:
            name=name.capitalize()
            return name

    def age_workers():
        age= input("How old is your worker? >>")
        try:
            age = int(age)
            return age
        except ValueError:
            print("That's not a number!")

    def experience_workers():
        experience= input("How many years of experience this person has? >>")
        try:
            experience = int(experience)
            return experience
        except ValueError:
            print("That's not a number!")

    def salary_workers():
        salary = input("What's the annual salary of your worker? (In €) >>")
        try:
            salary = float(salary)
            return salary
        except ValueError:
            print("That's not a number!")

    def times_workers():
        timer =[1,2,3,4,5,6]
        time = input("When does your worker works? >>\n"
                "1 for Morning\n"
                "2 for Evening\n"
                "3 for Night\n"
                "4 for Morning and Evening\n"
                "5 for Morning and Night\n"
                "6 for Evening and Night\n")
        try:
            time = int(time)
            if time in timer:
                return time
            else: print('Select a correct value')
        except ValueError:
            print("That's not a number!")

    def gender_workers():
        genders ={1:'Female',2:'Male',3:'Non-binary'}
        gender = input("What's the gender of your worker? >>\n"
                    "1 for Female\n"
                    "2 for Male\n"
                    "3 for Non-binary\n")
        try:
            gender = int(gender)
            if gender in genders:
                return genders[gender]
            else: print('Select a correct value')
        except ValueError:
            print("That's not a number!")

    def weekend_workers():
        weekends ={1:'Yes',2:'No'}
        weekend = input("Does your worker work during weekends? >>\n"
                    "1 for No\n"
                    "2 for Yes\n")
        try:
            weekend = int(weekend)
            if weekend in weekends:
                return weekend-1
            else: print('Select a correct value')
        except ValueError:
            print("That's not a number!")

    def holiday_workers():
        holidays ={1:'Yes',2:'No'}
        holiday = input("Does your worker work during public holidays? >>\n"
                    "1 for No\n"
                    "2 for Yes\n")
        try:
            holiday = int(holiday)
            if holiday in holidays:
                return holiday - 1
            else: print('Select a correct value')
        except ValueError:
            print("That's not a number!")

    def data_workers(number_of_workers):
        data={}
        for i in range(number_of_workers):
            name = name_workers()
            age = age_workers()
            experience = experience_workers()
            salary = salary_workers()
            schedule = times_workers()
            gender = gender_workers()
            weekend = weekend_workers()
            holiday = holiday_workers()
            data[i] = {'name': name,'age': age,'experience':experience,'salary':salary,'schedule':schedule,'gender':gender,'weekend':weekend,'holiday':holiday}
        return data
    df = data_workers(number_of_workers)
    df = pd.DataFrame.from_dict(df,orient='index')
    return df

In [7]:
data=pd.read_csv('/Users/monic/code/monicasainer/calendar/raw_data/Project.csv',delimiter=';',header=0,index_col=False)

In [8]:
data

Unnamed: 0,name,age,experience,salary,schedule,gender,weekend,holiday
0,Pablo,23,1,23000,1,Male,1,1
1,Paula,49,25,30000,3,Female,0,0
2,Laura,62,34,32000,1,Female,1,1
3,Carlos,56,32,31000,6,Male,0,1
4,Estefania,28,2,26000,4,Non-binary,0,0


In [9]:
worker_holidays = list(data[data['holiday']==1]['name'])
public_holidays=list(calendario[calendario['holidays']==1].index)
min_day = int(len(public_holidays)/len(worker_holidays))

In [10]:
worker_holidays
public_holidays
min_day

4

In [11]:
worker_holidays = list(data[data['holiday']==1]['name'])
public_holidays=list(calendario[calendario['holidays']==1].index)
min_day = len(public_holidays)/len(worker_holidays)
for i in public_holidays:
    name_selected = str(random.sample((worker_holidays),1))[2:-2]
    calendario['holidays'].iloc[i]=name_selected
for x in range(len(worker_holidays)):
    while calendario[calendario['holidays']==worker_holidays[x]]['holidays'].count()!=min_day:
         for i in public_holidays:
            name_selected = str(random.sample((worker_holidays),1))[2:-2]
            calendario['holidays'].iloc[i]=name_selected
calendario['holidays'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  calendario['holidays'].iloc[i]=name_selected
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  calendario['holidays'].iloc[i]=name_selected


0         353
Carlos      4
Pablo       4
Laura       4
Name: holidays, dtype: int64

In [16]:
print([calendario['holidays'].iloc[x] for x in public_holidays])

['Carlos', 'Pablo', 'Pablo', 'Pablo', 'Laura', 'Laura', 'Carlos', 'Laura', 'Laura', 'Carlos', 'Carlos', 'Pablo']


In [None]:
calendario[calendario['holidays']==Pablo]['holidays'].count()

In [None]:
name_selected = str(random.sample((holiday),1))[2:-2]

In [None]:
repetition = name_selected * 5

In [None]:
repetition_split=repetition.split()

In [None]:
repetition_split

In [None]:
calendario.loc[0,'holidays']

In [None]:
i=5
prior_names =[calendario['holidays'].iloc[[x]] for x in range(i-5,5)]

In [None]:
type(prior_names)

In [None]:
[name_selected]*5

In [None]:
while i<5:
        calendario.loc[i,'holidays']=name_selected
    else:   
        name_selected = str(random.sample((holiday),1))[2:-2]
        prior_names =[calendario['holidays'].iloc[[x]] for x in range(i-5,5)]
        if prior_names == [name_selected]* 5:
            holiday.remove(name_selected)
            new_name_selected = str(random.sample((holiday),1))[2:-2]
            calendario.loc[i,'holidays']=new_name_selected
            holiday.append(name_selected)
        else: calendario.loc[i,'holidays'] = name_selected