# PEOPLE ANALYTICS PROJECT: ABSENTEEISM 

### The goal of this project is to apply certain Data Cleaning techniques to prepare the dataset, which will be further analyzed. Pandas and NumPy are the main libraries used in this project.



## IMPORT LIBRARIES & DATASET

In [55]:
import pandas as pd
import numpy as np

In [56]:
raw_csv_data = pd.read_csv('Absenteeism-data.csv')
raw_csv_data.head(20)

Unnamed: 0,ID,Reason for Absence,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,11,26,07/07/2015,289,36,33,239.554,30,1,2,1,4
1,36,0,14/07/2015,118,13,50,239.554,31,1,1,0,0
2,3,23,15/07/2015,179,51,38,239.554,31,1,0,0,2
3,7,7,16/07/2015,279,5,39,239.554,24,1,2,0,4
4,11,23,23/07/2015,289,36,33,239.554,30,1,2,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...
15,24,14,17/07/2015,246,25,41,239.554,23,1,0,0,8
16,3,23,17/07/2015,179,51,38,239.554,31,1,0,0,2
17,3,21,27/07/2015,179,51,38,239.554,31,1,0,0,8
18,6,11,30/07/2015,189,29,33,239.554,25,1,2,2,8


In [57]:
# Adding a copy 

df = raw_csv_data.copy()
df.head()

Unnamed: 0,ID,Reason for Absence,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,11,26,07/07/2015,289,36,33,239.554,30,1,2,1,4
1,36,0,14/07/2015,118,13,50,239.554,31,1,1,0,0
2,3,23,15/07/2015,179,51,38,239.554,31,1,0,0,2
3,7,7,16/07/2015,279,5,39,239.554,24,1,2,0,4
4,11,23,23/07/2015,289,36,33,239.554,30,1,2,1,2


## OVERVIEW OF THE DATASET

In [58]:
# Preferred display options
pd.options.display.max_columns = None
pd.options.display.max_rows = 10

In [59]:
# No single row / column is missing
display(df)

Unnamed: 0,ID,Reason for Absence,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,11,26,07/07/2015,289,36,33,239.554,30,1,2,1,4
1,36,0,14/07/2015,118,13,50,239.554,31,1,1,0,0
2,3,23,15/07/2015,179,51,38,239.554,31,1,0,0,2
3,7,7,16/07/2015,279,5,39,239.554,24,1,2,0,4
4,11,23,23/07/2015,289,36,33,239.554,30,1,2,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...
695,17,10,23/05/2018,179,22,40,237.656,22,2,2,0,8
696,28,6,23/05/2018,225,26,28,237.656,24,1,1,2,3
697,18,10,24/05/2018,330,16,28,237.656,25,2,0,0,8
698,25,23,24/05/2018,235,16,32,237.656,25,3,0,0,2


In [60]:
# In case of longer df, it is better to go this way to check the accuracy of the data. 
# For this particular df, we have all 700 rows values, no missing values for each feature in the df.

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 700 entries, 0 to 699
Data columns (total 12 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   ID                         700 non-null    int64  
 1   Reason for Absence         700 non-null    int64  
 2   Date                       700 non-null    object 
 3   Transportation Expense     700 non-null    int64  
 4   Distance to Work           700 non-null    int64  
 5   Age                        700 non-null    int64  
 6   Daily Work Load Average    700 non-null    float64
 7   Body Mass Index            700 non-null    int64  
 8   Education                  700 non-null    int64  
 9   Children                   700 non-null    int64  
 10  Pets                       700 non-null    int64  
 11  Absenteeism Time in Hours  700 non-null    int64  
dtypes: float64(1), int64(10), object(1)
memory usage: 65.8+ KB


In [61]:
# Statistical approach to solve this exercise
# Predict absenteeism from work (dependent variable) while the others features are independent variables 

### Drop ID

In [62]:
# Dropping the ID Column because it will not help to the analysis (do not carry numeric information)


df = df.drop(['ID'], axis = 1)
df.head()

Unnamed: 0,Reason for Absence,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,26,07/07/2015,289,36,33,239.554,30,1,2,1,4
1,0,14/07/2015,118,13,50,239.554,31,1,1,0,0
2,23,15/07/2015,179,51,38,239.554,31,1,0,0,2
3,7,16/07/2015,279,5,39,239.554,24,1,2,0,4
4,23,23/07/2015,289,36,33,239.554,30,1,2,1,2


### Absence reason (grouped dummy)

In [63]:
# Analysis for reason for absence
df['Reason for Absence'].head()

0    26
1     0
2    23
3     7
4    23
Name: Reason for Absence, dtype: int64

In [64]:
df['Reason for Absence'].max()

28

In [65]:
df['Reason for Absence'].min()

0

In [66]:
pd.unique(df['Reason for Absence'])

array([26,  0, 23,  7, 22, 19,  1, 11, 14, 21, 10, 13, 28, 18, 25, 24,  6,
       27, 17,  8, 12,  5,  9, 15,  4,  3,  2, 16])

In [67]:
len(df['Reason for Absence'].unique())

28

In [68]:
# min value 0, max value 28, len 28. That would be 29 numbers reasons, we only have 28, there is one number missing

sorted(df['Reason for Absence'].unique())

# Number 20 is missing. 
# This column is categorical nominal (Reason of the absence)

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28]

In [69]:
# Adding dummies related to the absence's reason 

reason_columns = pd.get_dummies(df['Reason for Absence'])
reason_columns

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,25,26,27,28
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
696,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
697,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
698,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0


In [70]:
# Check if all absences have, at least, one reason. Is there any missing values regarding absences reasons?

reason_columns['check'] = reason_columns.sum(axis=1)
reason_columns

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,25,26,27,28,check
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
696,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
697,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
698,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1


In [71]:
reason_columns['check'].sum(axis=0)

700

In [72]:
reason_columns['check'].unique()

array([1])

In [73]:
# removing check column as it is not necessary anymore. 
reason_columns = reason_columns.drop(['check'], axis = 1)
reason_columns.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,25,26,27,28
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0


In [74]:
# Avoid multicollinearity issues -- Column zero is not there anymore.

reason_columns = pd.get_dummies(df['Reason for Absence'], drop_first=True)
reason_columns.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,25,26,27,28
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0


In [75]:
# Too many columns, it will better to group them together within certain defined classes. 

df.columns.values

array(['Reason for Absence', 'Date', 'Transportation Expense',
       'Distance to Work', 'Age', 'Daily Work Load Average',
       'Body Mass Index', 'Education', 'Children', 'Pets',
       'Absenteeism Time in Hours'], dtype=object)

In [76]:
reason_columns.columns.values

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 21, 22, 23, 24, 25, 26, 27, 28])

In [77]:
df = df.drop(['Reason for Absence'], axis=1)

In [78]:
# GROUP 1 (1 to 14): Disease group
# GROUP 2 (15 to 17): Pregnancy
# GROUP 3 (18 to 21): Poisoning
# GROUP 4 (22 to 28): Light reason for absence (medical consultation, dental appointment, etc).

reason_columns.loc[:,:14] # label-based
reason_columns.loc[:,15:17]
reason_columns.loc[:,18:21]
reason_columns.loc[:,22:].head()

Unnamed: 0,22,23,24,25,26,27,28
0,0,0,0,0,1,0,0
1,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0
3,0,0,0,0,0,0,0
4,0,1,0,0,0,0,0


In [79]:
# Assigning groups to each one of the reasons type below.  

reason_type_1 = reason_columns.loc[:, 1:14].max(axis = 1)
reason_type_2 = reason_columns.loc[:,15:17].max(axis = 1)
reason_type_3 = reason_columns.loc[:, 18:21].max(axis = 1)
reason_type_4 = reason_columns.loc[:, 22:].max(axis = 1)

In [80]:
# Concatenate column values

df = pd.concat([df, reason_type_1, reason_type_2, reason_type_3, reason_type_4], axis = 1)
df.head()

Unnamed: 0,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours,0,1,2,3
0,07/07/2015,289,36,33,239.554,30,1,2,1,4,0,0,0,1
1,14/07/2015,118,13,50,239.554,31,1,1,0,0,0,0,0,0
2,15/07/2015,179,51,38,239.554,31,1,0,0,2,0,0,0,1
3,16/07/2015,279,5,39,239.554,24,1,2,0,4,1,0,0,0
4,23/07/2015,289,36,33,239.554,30,1,2,1,2,0,0,0,1


In [81]:
# Relabelling columns 

column_names = ['Date', 'Transportation Expense', 'Distance to Work', 'Age',
       'Daily Work Load Average', 'Body Mass Index', 'Education',
       'Children', 'Pets', 'Absenteeism Time in Hours', 'Reason_1',
       'Reason_2', 'Reason_3', 'Reason_4']
df.columns = column_names 

In [82]:
df.head()

Unnamed: 0,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours,Reason_1,Reason_2,Reason_3,Reason_4
0,07/07/2015,289,36,33,239.554,30,1,2,1,4,0,0,0,1
1,14/07/2015,118,13,50,239.554,31,1,1,0,0,0,0,0,0
2,15/07/2015,179,51,38,239.554,31,1,0,0,2,0,0,0,1
3,16/07/2015,279,5,39,239.554,24,1,2,0,4,1,0,0,0
4,23/07/2015,289,36,33,239.554,30,1,2,1,2,0,0,0,1


In [83]:
df.columns.values

array(['Date', 'Transportation Expense', 'Distance to Work', 'Age',
       'Daily Work Load Average', 'Body Mass Index', 'Education',
       'Children', 'Pets', 'Absenteeism Time in Hours', 'Reason_1',
       'Reason_2', 'Reason_3', 'Reason_4'], dtype=object)

In [84]:
# Reordering columns in df

column_names_reordered = ['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Date', 'Transportation Expense', 
                          'Distance to Work', 'Age', 'Daily Work Load Average', 
                          'Body Mass Index', 'Education', 'Children', 'Pets', 'Absenteeism Time in Hours']

column_names_reordered

['Reason_1',
 'Reason_2',
 'Reason_3',
 'Reason_4',
 'Date',
 'Transportation Expense',
 'Distance to Work',
 'Age',
 'Daily Work Load Average',
 'Body Mass Index',
 'Education',
 'Children',
 'Pets',
 'Absenteeism Time in Hours']

In [85]:
df = df[column_names_reordered]
df.head()

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,0,0,0,1,07/07/2015,289,36,33,239.554,30,1,2,1,4
1,0,0,0,0,14/07/2015,118,13,50,239.554,31,1,1,0,0
2,0,0,0,1,15/07/2015,179,51,38,239.554,31,1,0,0,2
3,1,0,0,0,16/07/2015,279,5,39,239.554,24,1,2,0,4
4,0,0,0,1,23/07/2015,289,36,33,239.554,30,1,2,1,2


### Checkpoint with ID and Absence reason 

In [86]:
# Checkpoint
df_reason_mod = df.copy()

### Dates (format) + Month + Day of week

In [87]:
type(df_reason_mod['Date'])

pandas.core.series.Series

In [88]:
# Checking values type. In this case: string. We will need to convert this into timestamp type. 
type(df_reason_mod['Date'][0])

str

In [89]:
# format as %d %m %Y
df_reason_mod['Date'] = pd.to_datetime(df_reason_mod['Date'],
                                      format = '%d/%m/%Y')

In [90]:
df_reason_mod['Date']

0     2015-07-07
1     2015-07-14
2     2015-07-15
3     2015-07-16
4     2015-07-23
         ...    
695   2018-05-23
696   2018-05-23
697   2018-05-24
698   2018-05-24
699   2018-05-31
Name: Date, Length: 700, dtype: datetime64[ns]

In [91]:
# Now it is type timestamp
type(df_reason_mod['Date'][0])

pandas._libs.tslibs.timestamps.Timestamp

In [92]:
df_reason_mod.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 700 entries, 0 to 699
Data columns (total 14 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   Reason_1                   700 non-null    uint8         
 1   Reason_2                   700 non-null    uint8         
 2   Reason_3                   700 non-null    uint8         
 3   Reason_4                   700 non-null    uint8         
 4   Date                       700 non-null    datetime64[ns]
 5   Transportation Expense     700 non-null    int64         
 6   Distance to Work           700 non-null    int64         
 7   Age                        700 non-null    int64         
 8   Daily Work Load Average    700 non-null    float64       
 9   Body Mass Index            700 non-null    int64         
 10  Education                  700 non-null    int64         
 11  Children                   700 non-null    int64         
 12  Pets    

In [93]:
# Expressing date values as they are now, it is not convenient. We rather extract the month first. 

df_reason_mod['Date'][0]

Timestamp('2015-07-07 00:00:00')

In [94]:
df_reason_mod['Date'][0].month

7

In [95]:
df_reason_mod.shape[0]

700

In [96]:
list_months = []
for i in range(df_reason_mod.shape[0]):
    list_months.append(df_reason_mod['Date'][i].month)

In [97]:
# Checking the list
len(list_months) # Output: 700 values, same as .shape[0]
list_months[:20]

[7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8]

In [98]:
df_reason_mod['Monthly Value'] = list_months
df_reason_mod.head()

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours,Monthly Value
0,0,0,0,1,2015-07-07,289,36,33,239.554,30,1,2,1,4,7
1,0,0,0,0,2015-07-14,118,13,50,239.554,31,1,1,0,0,7
2,0,0,0,1,2015-07-15,179,51,38,239.554,31,1,0,0,2,7
3,1,0,0,0,2015-07-16,279,5,39,239.554,24,1,2,0,4,7
4,0,0,0,1,2015-07-23,289,36,33,239.554,30,1,2,1,2,7


In [99]:
# Lets create a day of the week column for analysis (prob. employees are taking Mondays and Fridays off)

df_reason_mod['Date'][699].weekday()

3

In [100]:
def date_to_weekday(date_value):
    return date_value.weekday()

In [101]:
day_of_week_list = []
for i in range(df_reason_mod['Date'].shape[0]):
    day_of_week_list.append(df_reason_mod['Date'][i].weekday())

In [102]:
day_of_week_list[:20]

[1, 1, 2, 3, 3, 4, 4, 4, 0, 0, 0, 1, 2, 2, 2, 4, 4, 0, 3, 2]

In [103]:
day_of_week_other_list = df_reason_mod['Date'].apply(date_to_weekday)
day_of_week_other_list

0      1
1      1
2      2
3      3
4      3
      ..
695    2
696    2
697    3
698    3
699    3
Name: Date, Length: 700, dtype: int64

In [104]:
# Both of them work
np.equal(day_of_week_list, day_of_week_other_list).unique()

array([ True])

In [105]:
df_reason_mod['Day of the Week'] = day_of_week_list

In [106]:
df_reason_mod.head()

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours,Monthly Value,Day of the Week
0,0,0,0,1,2015-07-07,289,36,33,239.554,30,1,2,1,4,7,1
1,0,0,0,0,2015-07-14,118,13,50,239.554,31,1,1,0,0,7,1
2,0,0,0,1,2015-07-15,179,51,38,239.554,31,1,0,0,2,7,2
3,1,0,0,0,2015-07-16,279,5,39,239.554,24,1,2,0,4,7,3
4,0,0,0,1,2015-07-23,289,36,33,239.554,30,1,2,1,2,7,3


### Checkpoint - ID, Absence reason and date features

In [107]:
df_reason_date_mod = df_reason_mod.copy()

### Remaining features & Education dummy (split in 2 groups)

In [108]:
# Comments regarding the remaining columns.

# Transportation expense for reimbursement (rounded in EUR)
# Distance to work (kms) an individual must travel from home to work
# Age rounded down
# Float value, the avg amount of time spent working per day, shown in minutes
# Body Mass Index. Indicator for an under, normal, overweight or obese person. 

In [109]:
# Education, Children, Pets --> All categorical containing integers.
# We need to transform the education column into dummies variables.

np.sort(df_reason_date_mod['Education'].unique())

array([1, 2, 3, 4])

In [110]:
# Education 1: High school.
# Education 2: Graduate
# Education 3: Postgraduate
# Education 4: Master or PhD

df_reason_date_mod['Education'].value_counts().sort_values(axis=0, ascending=False)

1    583
3     73
2     40
4      4
Name: Education, dtype: int64

In [111]:
# Considering Education 2, 3, 4 VS Education 1, then it makes no sense to separate 2-3-4 individually, but rather 
# create two dummies (highschool vs more than highschool degree).

df_reason_date_mod['Education'] = df_reason_date_mod['Education'].map( {1:0, 2:1, 3:1, 4:1} )

In [112]:
df_reason_date_mod['Education'].unique()

array([0, 1])

In [113]:
df_reason_date_mod['Education'].value_counts()

0    583
1    117
Name: Education, dtype: int64

## OUTPUT - CLEANED DATASET

In [114]:
df_cleaned = df_reason_date_mod.copy()
df_cleaned.head()

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Date,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours,Monthly Value,Day of the Week
0,0,0,0,1,2015-07-07,289,36,33,239.554,30,0,2,1,4,7,1
1,0,0,0,0,2015-07-14,118,13,50,239.554,31,0,1,0,0,7,1
2,0,0,0,1,2015-07-15,179,51,38,239.554,31,0,0,0,2,7,2
3,1,0,0,0,2015-07-16,279,5,39,239.554,24,0,2,0,4,7,3
4,0,0,0,1,2015-07-23,289,36,33,239.554,30,0,2,1,2,7,3
