In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from datetime import datetime as dt
import math

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
pd_data_base_path = '/content/drive/MyDrive/Preppin Data/Data/'

In [4]:
list_of_dfs = []
df = pd.read_csv(pd_data_base_path + 'MOCK_DATA.csv')
df['month'] = 1
list_of_dfs.append(df)
for i in range(1,12):
  df = pd.read_csv(pd_data_base_path + 'MOCK_DATA-' + str(i+1) +'.csv')
  df['month'] = i+1
  list_of_dfs.append(df)

In [5]:
df = pd.concat(list_of_dfs, ignore_index=True)

In [6]:
df.month.value_counts()

1     1000
2     1000
3     1000
4     1000
5     1000
6     1000
7     1000
8     1000
9     1000
10    1000
11    1000
12    1000
Name: month, dtype: int64

In [7]:
df.rename(columns={'Market Cap':'Market Capitalisation'}, inplace=True)

In [8]:
df = df.dropna(subset=['Market Capitalisation'])

In [9]:
def purchase_price_categorisation(x):
  if float(x[1:])<24999.99:
    return 'Low'
  elif float(x[1:])<49999.99:
    return 'Medium'
  elif float(x[1:])<74999.99:
    return 'High'
  elif float(x[1:])>=75000:
    return 'Very High'

df['Purchase Price Categorisation'] = df['Purchase Price'].apply(purchase_price_categorisation)

In [10]:
def market_capitalisation_categorisation(x):
  if x[-1]=='B':
    if float(x[1:-1])<100:
      return 'Large'
    else:
      return 'Huge'
  else:
    if float(x[1:-1])<100:
      return 'Small'
    else:
      return 'Medium'

df['Market Capitalisation Categorisation'] = df['Market Capitalisation'].apply(market_capitalisation_categorisation)

In [11]:
df['Purchase Price'] = pd.to_numeric(df['Purchase Price'].str.replace('[$,]', '', regex=True)).round(2)

In [12]:
mask_df = (
    df.sort_values(['month', 'Purchase Price Categorisation', 'Market Capitalisation Categorisation', 'Purchase Price'], ascending=[True, True, True, False])
    .groupby(['month', 'Purchase Price Categorisation', 'Market Capitalisation Categorisation'])
    .cumcount()
    .to_frame(name='Rank')
    .merge(df, left_index=True, right_index=True)
    .reset_index(drop=True)
  )

In [13]:
mask_df

Unnamed: 0,Rank,id,first_name,last_name,Ticker,Sector,Market,Stock Name,Market Capitalisation,Purchase Price,month,Purchase Price Categorisation,Market Capitalisation Categorisation
0,0,966,Artemas,Franzini,A,Capital Goods,NYSE,"Agilent Technologies, Inc.",$19.12B,74995.16,1,High,Large
1,1,425,Bernetta,Garshore,OSK,Capital Goods,NYSE,Oshkosh Corporation,$5.07B,74935.82,1,High,Large
2,2,715,Jeanne,Ugolini,PLAY,Consumer Services,NASDAQ,"Dave & Buster's Entertainment, Inc.",$2.84B,74621.72,1,High,Large
3,3,868,Angelika,Shurman,DOC,Consumer Services,NYSE,Physicians Realty Trust,$3.26B,74155.03,1,High,Large
4,4,54,Jany,Hancke,CSOD,Technology,NASDAQ,"Cornerstone OnDemand, Inc.",$2.09B,74079.42,1,High,Large
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10104,37,386,Godard,Aloshechkin,AZRX,,NASDAQ,"AzurRx BioPharma, Inc.",$38.27M,78218.19,12,Very High,Small
10105,38,283,Caresse,Pyrke,RELY,Consumer Non-Durables,NASDAQ,"Real Industry, Inc.",$90.88M,78113.35,12,Very High,Small
10106,39,560,Bren,Stiller,ELON,Technology,NASDAQ,Echelon Corporation,$24.7M,77270.32,12,Very High,Small
10107,40,253,Kiri,Chopping,CTIB,Basic Industries,NASDAQ,CTI Industries Corporation,$21.03M,76885.36,12,Very High,Small


In [14]:
df = mask_df[mask_df['Rank']<5]
df['Rank'] = df['Rank'] + 1
columns = ['Market Capitalisation Categorisation', 'Purchase Price Categorisation', 'month', 'Ticker', 'Sector', 'Market', 'Stock Name', 'Market Capitalisation', 'Purchase Price', 'Rank']
df = df[columns]

In [15]:
df.head()

Unnamed: 0,Market Capitalisation Categorisation,Purchase Price Categorisation,month,Ticker,Sector,Market,Stock Name,Market Capitalisation,Purchase Price,Rank
0,Large,High,1,A,Capital Goods,NYSE,"Agilent Technologies, Inc.",$19.12B,74995.16,1
1,Large,High,1,OSK,Capital Goods,NYSE,Oshkosh Corporation,$5.07B,74935.82,2
2,Large,High,1,PLAY,Consumer Services,NASDAQ,"Dave & Buster's Entertainment, Inc.",$2.84B,74621.72,3
3,Large,High,1,DOC,Consumer Services,NYSE,Physicians Realty Trust,$3.26B,74155.03,4
4,Large,High,1,CSOD,Technology,NASDAQ,"Cornerstone OnDemand, Inc.",$2.09B,74079.42,5


In [16]:
df.shape

(831, 10)