## Load data

In [None]:
import pandas as pd

# Load data from excel
# load all sheets with sheet_name=None
# mark DATE as date type
sheets = pd.read_excel("MoMo-Case-challenge-for-DA.xlsx", sheet_name=None)

# transDf contain the data of `Transactions` table
transDf = sheets["Data. Transactions"]
transDf['DATE'] = pd.to_datetime(transDf['DATE'])

# transDf contain the data of `Loyalty Points` table
pointDf = sheets["Data. Loyalty Points"]

# benefitDf contain the data of `Loyalty Benefits` table
benefitDf = sheets["Data. Loyalty benefits"]

pointDf.head()

Create Ranking table:

In [None]:
rankDf = pd.DataFrame({'Class_ID': [1, 2, 3, 4],
        'Rank_name': ['STANDARD', 'SILVER', 'GOLD', 'DIAMOND'],
        'Loyalty_Points': [999, 1999, 4999, 5000]})
rankDf.head()

## Normalize data

Convert `Point Mechanism` to number:

In [None]:
pointDf['Point Mechanism'] = pd.to_numeric(pointDf['Point Mechanism'].str.replace(' points/1000VND GMV', ''), errors='coerce')
pointDf.head()

Convert `Maximum Point Per Trans` to number:

In [None]:
pointDf['Maximum Point Per Trans'] = pd.to_numeric(pointDf['Maximum Point Per Trans'].str.replace(' points', ''), errors='coerce')
pointDf.head()

## Part 1

### Combined with the `Loyalty Points` table, add a column `Loyalty_Points` in the `Transactions` table with the given rules. 

In [None]:
mergedDf = pd.merge(transDf, pointDf, on='Service Group')
mergedDf['Loyalty_Points'] = mergedDf.apply(lambda x: min(x['GMV'] * x['Point Mechanism'], x['Maximum Point Per Trans']), axis=1)

transDf['Loyalty_Points'] = mergedDf['Loyalty_Points']
transDf.head()

### Create another table named `Loyalty Ranking` which must include columns named `Rank_name` and `Calculated_points` to calculate the Rank of each user on a daily basis.

In [None]:
def get_rank(point):
    ranks = rankDf[rankDf['Loyalty_Points'] < 3000].sort_values(by=['Loyalty_Points'], ascending=False)
    highest_rank = ranks.iloc[0]['Rank_name'] if len(ranks) >= 1 else rankDf[0]['Rank_name']
    return highest_rank

# add Calculated_points column
userRankDf = transDf.groupby(['DATE', 'User_id'])['Loyalty_Points'].sum().reset_index()
userRankDf.rename(columns={'Loyalty_Points': 'Calculated_points'}, inplace=True)

# add Rank_name column
userRankDf['Rank_name'] = userRankDf['Calculated_points'].apply(get_rank)

userRankDf.head()

### At the end of Mar 2022, how many users achieved the GOLD rank?


In [76]:
end_of_mar_2022 = pd.Timestamp('2022-03-31')
rank = 'GOLD'
result = userRankDf[(userRankDf['DATE'] <= end_of_mar_2022) & (userRankDf['Rank_name'] == rank)]
print(f'At the end of Mar 2022, {len(result)} users achieved the {rank} rank')


At the end of Mar 2022, 0 users achieved the GOLD rank
