# CSS/IRL 3501 - Skills Task 1

### Python 201 Tutorials - for your reference

#### Tutorial 1.1: Basic Pandas
- DataFrame, Series
- select, add, drop rows and columns
- read and write dataframes

#### Tutorial 1.2: Sorting & Grouping Data
- sort_values()
- groupby()
- agg() statistics
- drop_duplicates()

#### Tutorial 1.3: Shaping & Merging Dataframes
- choosing fields to join on
- types of joins: left join, inner join, outer join
- labeling columns

#### Tutorial 1.4: Preparing Data Subsets for Visualization
- creating new variables
- creating visualization subsets by selecting columns, grouping and aggregating rows 

# Step 1 - open Merged.xlsx

In [24]:
import pandas as pd

In [25]:
# read in excel file as a pandas dataframe - refer to Python 201 Tutorial 1
merged = pd.read_excel("Merged.xlsx",header=0)

In [26]:
merged

Unnamed: 0,Month,RoomType,NumberOfRooms,Reservations,DaysStayed,Bar_Plan,Bar_Actual,Food_Plan,Food_Actual,Room_Plan,Room_Actual,Total_Plan,Total_Actual
0,1,Club Deluxe King Room,28,135,456,25704,21888,64260,30271,214200,205200,304164,257359
1,1,Executive Suite,36,323,962,32400,44426,81000,105237,270000,288600,383400,438263
2,1,King Suite,44,633,1332,29568,48043,73920,121782,246400,266400,349888,436225
3,1,One-Bedroom Club Room,10,60,177,12240,10025,30600,14138,102000,106200,144840,130363
4,1,One-Bedroom Suite,6,40,93,9216,7296,23040,9343,76800,74400,109056,91039
...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,12,Executive Suite,36,231,830,31356,35451,78408,92230,261360,273900,371124,401581
68,12,King Suite,44,152,204,29040,1110,72600,2682,242000,44880,343640,48672
69,12,One-Bedroom Club Room,10,37,137,13440,7550,33600,18772,112000,95900,159040,122222
70,12,One-Bedroom Suite,6,47,99,12312,8640,30780,21078,102600,89100,145692,118818


# Step 2 - create new variables calculating Actual as a percent of Plan

In [27]:
# create new columns from old columns
merged["Bar_Percent"] = merged["Bar_Actual"] / merged["Bar_Plan"]

In [28]:
merged["Food_Percent"] = merged["Food_Actual"] / merged["Food_Plan"]

In [29]:
merged["Room_Percent"] = merged["Room_Actual"] / merged["Room_Plan"]

In [30]:
merged["Total_Percent"] = merged["Total_Actual"] / merged["Total_Plan"]

In [31]:
merged

Unnamed: 0,Month,RoomType,NumberOfRooms,Reservations,DaysStayed,Bar_Plan,Bar_Actual,Food_Plan,Food_Actual,Room_Plan,Room_Actual,Total_Plan,Total_Actual,Bar_Percent,Food_Percent,Room_Percent,Total_Percent
0,1,Club Deluxe King Room,28,135,456,25704,21888,64260,30271,214200,205200,304164,257359,0.851541,0.471071,0.957983,0.846119
1,1,Executive Suite,36,323,962,32400,44426,81000,105237,270000,288600,383400,438263,1.371173,1.299222,1.068889,1.143096
2,1,King Suite,44,633,1332,29568,48043,73920,121782,246400,266400,349888,436225,1.624831,1.647484,1.081169,1.246756
3,1,One-Bedroom Club Room,10,60,177,12240,10025,30600,14138,102000,106200,144840,130363,0.819036,0.462026,1.041176,0.900048
4,1,One-Bedroom Suite,6,40,93,9216,7296,23040,9343,76800,74400,109056,91039,0.791667,0.405512,0.968750,0.834791
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,12,Executive Suite,36,231,830,31356,35451,78408,92230,261360,273900,371124,401581,1.130597,1.176283,1.047980,1.082067
68,12,King Suite,44,152,204,29040,1110,72600,2682,242000,44880,343640,48672,0.038223,0.036942,0.185455,0.141637
69,12,One-Bedroom Club Room,10,37,137,13440,7550,33600,18772,112000,95900,159040,122222,0.561756,0.558690,0.856250,0.768498
70,12,One-Bedroom Suite,6,47,99,12312,8640,30780,21078,102600,89100,145692,118818,0.701754,0.684795,0.868421,0.815542


# Step 3 - prepare data for visualizations

### Visualization 1 - total percentages by month and room type

In [32]:
# select columns - refer to Python 201 Tutorial 1
visualization_columns = ["Month","RoomType","Total_Percent"]
v1 = merged[visualization_columns]

In [33]:
v1

Unnamed: 0,Month,RoomType,Total_Percent
0,1,Club Deluxe King Room,0.846119
1,1,Executive Suite,1.143096
2,1,King Suite,1.246756
3,1,One-Bedroom Club Room,0.900048
4,1,One-Bedroom Suite,0.834791
...,...,...,...
67,12,Executive Suite,1.082067
68,12,King Suite,0.141637
69,12,One-Bedroom Club Room,0.768498
70,12,One-Bedroom Suite,0.815542


In [34]:
# write a pandas dataframe to an excel file - refer to Python 201 Tutorial 1
v1.to_excel("v1.xlsx",index=False)

### Visualization 2 - total percentages by month and revenue type (bar, food, room)

In [35]:
# Visualization 1 queried total percentages by month and room type.
# In Visualization 2, please query total percentages by month and revenue type (bar, food, room)
# Write this query out as an excel file, "v2.xlsx"

### Visualization 3 - bar, food, room and total percentages by month, for each room type

In [36]:
# explore columns of the dataframe (Series) - refer to Python 201 Tutorial 1
merged.RoomType.unique()

array(['Club Deluxe King Room', 'Executive Suite', 'King Suite',
       'One-Bedroom Club Room', 'One-Bedroom Suite', 'Superior King Room'],
      dtype=object)

In [37]:
# select columns using a list - refer to Python 201 Tutorial 1
# select rows using a boolean statement - refer to Python 201 Tutorial 1
# select rows and columns at the same time using .loc - refer to Python 201 Tutorial 1
visualization_columns = ["Month","Total_Percent","Bar_Percent","Food_Percent","Room_Percent"]
v3_clubdeluxe = merged.loc[merged.RoomType == "Club Deluxe King Room",visualization_columns]
v3_clubdeluxe

Unnamed: 0,Month,Total_Percent,Bar_Percent,Food_Percent,Room_Percent
0,1,0.846119,0.851541,0.471071,0.957983
6,2,0.848689,0.869081,0.42545,0.973214
12,3,0.838212,1.099756,0.43665,0.927296
18,4,0.862296,1.132214,0.326372,0.990683
24,5,0.84421,0.933482,0.193964,1.028571
30,6,0.742091,0.795745,0.09903,0.928571
36,7,1.038673,1.155234,1.116704,1.001276
42,8,1.015496,1.010157,1.026767,1.012755
48,9,0.964911,0.876736,0.99093,0.967687
54,10,0.987393,0.951173,0.996089,0.98913


In [38]:
# melt a dataframe columns from long to wide - refer to Python 201 Tutorial 3
# pivot is the opposite function of melt, which expands dataframe columns from long to wide - refer to Python 201 Tutorial 3
idvars =  ["Month"]
# valuevars = []
varname = "RevenueType"
valuename = "Percent"
v3_clubdeluxe = pd.melt(v3_clubdeluxe,id_vars=idvars,var_name=varname,value_name=valuename)
v3_clubdeluxe

Unnamed: 0,Month,RevenueType,Percent
0,1,Total_Percent,0.846119
1,2,Total_Percent,0.848689
2,3,Total_Percent,0.838212
3,4,Total_Percent,0.862296
4,5,Total_Percent,0.84421
5,6,Total_Percent,0.742091
6,7,Total_Percent,1.038673
7,8,Total_Percent,1.015496
8,9,Total_Percent,0.964911
9,10,Total_Percent,0.987393


In [39]:
v3_clubdeluxe.to_excel("v3_clubdeluxe.xlsx",index=False)

In [40]:
# The code for Visualization 3 queried bar, food, room and total percentages by month, for the Club Deluxe King Room.
# The v3_clubdeluxe dataframe was then written as an excel file.
# Repeat the same process for the remaining five room types.
# Write out these five dataframes as five separate excel files.

# 1. 'Club Deluxe King Room'
# 2. 'Executive Suite'
# 3. 'King Suite'
# 4. 'One-Bedroom Club Room'
# 5. 'One-Bedroom Suite'
# 6. 'Superior King Room'

### Visualization 4 - annual total percentages by room type

In [41]:
# group rows by a column and apply an aggregate fxn, sum, to the remaining columns - refer to Python 201 Tutorial 2
grouping_columns = ["RoomType","Total_Plan","Total_Actual"]
annual_total = merged[grouping_columns].groupby("RoomType",as_index=False).sum()

In [42]:
# create a new column as a function of old columns
annual_total["Total_Percent"] = annual_total["Total_Actual"] / annual_total["Total_Plan"]
annual_total

Unnamed: 0,RoomType,Total_Plan,Total_Actual,Total_Percent
0,Club Deluxe King Room,4668216,4271245,0.914963
1,Executive Suite,3990420,4097124,1.02674
2,King Suite,2734732,1766965,0.64612
3,One-Bedroom Club Room,1996510,1726682,0.86485
4,One-Bedroom Suite,1563420,1363728,0.872272
5,Superior King Room,4024352,4015977,0.997919


In [43]:
# select columns using a list - refer to Python 201 Tutorial 1
visualization_columns = ["RoomType","Total_Percent"]
v4 = annual_total[visualization_columns]
v4

Unnamed: 0,RoomType,Total_Percent
0,Club Deluxe King Room,0.914963
1,Executive Suite,1.02674
2,King Suite,0.64612
3,One-Bedroom Club Room,0.86485
4,One-Bedroom Suite,0.872272
5,Superior King Room,0.997919


In [44]:
# write a pandas dataframe to an excel file - refer to Python 201 Tutorial 1
v4.to_excel("v4.xlsx",index=False)

### Visualization 5 - annual total percentages by revenue type (bar, food, room)

In [45]:
# Visualization 4 queried annual total percentages by month and room type.
# In Visualization 5, please query annual total percentages by month and revenue type (bar, food, room)
# Write this query out as an excel file, "v5.xlsx"

### Visualization 6 - annual room, bar, food percentages by room type

In [46]:
# group rows by a column and apply an aggregate fxn, sum, to the remaining columns - refer to Python 201 Tutorial 2
grouping_columns = ["RoomType","Total_Actual","Total_Plan","Bar_Actual","Bar_Plan","Food_Actual","Food_Plan","Room_Actual","Room_Plan"]
annual_total = merged[grouping_columns].groupby("RoomType",as_index=False).sum()

In [47]:
# create a new column as a function of old columns
annual_total["Total_Percent"] = annual_total["Total_Actual"] / annual_total["Total_Plan"]
annual_total["Bar_Percent"] = annual_total["Bar_Actual"] / annual_total["Bar_Plan"]
annual_total["Food_Percent"] = annual_total["Food_Actual"] / annual_total["Food_Plan"]
annual_total["Room_Percent"] = annual_total["Room_Actual"] / annual_total["Room_Plan"]
annual_total

Unnamed: 0,RoomType,Total_Actual,Total_Plan,Bar_Actual,Bar_Plan,Food_Actual,Food_Plan,Room_Actual,Room_Plan,Total_Percent,Bar_Percent,Food_Percent,Room_Percent
0,Club Deluxe King Room,4271245,4668216,380407,394492,669578,986244,3221260,3287480,0.914963,0.964296,0.678917,0.979857
1,Executive Suite,4097124,3990420,359968,337212,888806,843048,2848350,2810160,1.02674,1.067483,1.054277,1.01359
2,King Suite,1766965,2734732,136114,231088,341441,577764,1289410,1925880,0.64612,0.589014,0.59097,0.669517
3,One-Bedroom Club Room,1726682,1996510,141852,168710,257290,421800,1327540,1406000,0.86485,0.840804,0.609981,0.944196
4,One-Bedroom Suite,1363728,1563420,104887,132120,215341,330300,1043500,1101000,0.872272,0.793877,0.651956,0.947775
5,Superior King Room,4015977,4024352,329156,340100,827371,850212,2859450,2834040,0.997919,0.967821,0.973135,1.008966


In [48]:
# select columns using a list - refer to Python 201 Tutorial 1
visualization_columns = ["RoomType","Total_Percent","Bar_Percent","Food_Percent","Room_Percent"]
v6 = annual_total[visualization_columns]
v6

Unnamed: 0,RoomType,Total_Percent,Bar_Percent,Food_Percent,Room_Percent
0,Club Deluxe King Room,0.914963,0.964296,0.678917,0.979857
1,Executive Suite,1.02674,1.067483,1.054277,1.01359
2,King Suite,0.64612,0.589014,0.59097,0.669517
3,One-Bedroom Club Room,0.86485,0.840804,0.609981,0.944196
4,One-Bedroom Suite,0.872272,0.793877,0.651956,0.947775
5,Superior King Room,0.997919,0.967821,0.973135,1.008966


In [49]:
# melt a dataframe columns from long to wide - refer to Python 201 Tutorial 3
# pivot is the opposite function of melt, which expands dataframe columns from long to wide - refer to Python 201 Tutorial 3
idvars =  ["RoomType"]
# valuevars = []
varname = "RevenueType"
valuename = "Percent"
v6 = pd.melt(v6,id_vars=idvars,var_name=varname,value_name=valuename)
v6

Unnamed: 0,RoomType,RevenueType,Percent
0,Club Deluxe King Room,Total_Percent,0.914963
1,Executive Suite,Total_Percent,1.02674
2,King Suite,Total_Percent,0.64612
3,One-Bedroom Club Room,Total_Percent,0.86485
4,One-Bedroom Suite,Total_Percent,0.872272
5,Superior King Room,Total_Percent,0.997919
6,Club Deluxe King Room,Bar_Percent,0.964296
7,Executive Suite,Bar_Percent,1.067483
8,King Suite,Bar_Percent,0.589014
9,One-Bedroom Club Room,Bar_Percent,0.840804


In [50]:
# sort rows by one or more columns - refer to Python 201 Tutorial 2
v6 = v6.sort_values(["RoomType","RevenueType"])
v6

Unnamed: 0,RoomType,RevenueType,Percent
6,Club Deluxe King Room,Bar_Percent,0.964296
12,Club Deluxe King Room,Food_Percent,0.678917
18,Club Deluxe King Room,Room_Percent,0.979857
0,Club Deluxe King Room,Total_Percent,0.914963
7,Executive Suite,Bar_Percent,1.067483
13,Executive Suite,Food_Percent,1.054277
19,Executive Suite,Room_Percent,1.01359
1,Executive Suite,Total_Percent,1.02674
8,King Suite,Bar_Percent,0.589014
14,King Suite,Food_Percent,0.59097


In [51]:
# write a pandas dataframe to an excel file - refer to Python 201 Tutorial 1
v6.to_excel("v6.xlsx",index=False)