In [78]:
# Practice Question 1:
# Problem: You are given a table of transactions, and your task is to write a Python function that calculates the total revenue after applying a discount rate based on the transaction amount.

# Discount Structure:
# If the transaction amount is less than $50, apply a discount of 1%.
# If the transaction amount is $50 or more, apply a discount of 0.5%.
# Write a function that applies the discount and returns the total revenue after applying the discount to each transaction. 

import pandas as pd

# Create a sample DataFrame
data = {
    'transaction_id': [1, 2, 3, 4, 5],
    'amount': [25.00, 50.00, 75.00, 150.00, 10.00]
}

df = pd.DataFrame(data)

In [4]:
df.head()

Unnamed: 0,transaction_id,amount
0,1,25.0
1,2,50.0
2,3,75.0
3,4,150.0
4,5,10.0


In [12]:
# APPROACH 1 - insert then update

def apply_discount_df(df, amount_column):
    # apply 1% discount to all rows
    df['amount_discounted'] = df[amount_column]*0.99
    # update dicount to 0.5% for amounts of 50 or more
    df.loc[df[amount_column] >= 50,'amount_discounted'] = df[amount_column]*0.995
    # sum revenue
    return df['amount_discounted'].sum()

disc_rev = apply_discount_df(df, 'amount')
disc_rev                          

np.float64(308.275)

In [18]:
# APPROACH 2 - apply

# create function to apply discount
def apply_discount(x):
    if x < 50:
        return x*0.99
    elif x >= 50:
        return x*0.995

# create function to sum revenue after discount
def apply_discount_sum(df, amount_col):
    # apply dicount in df and sum revenue
    return df[amount_col].apply(apply_discount).sum()

disc_rev_2 = apply_discount_sum(df, 'amount')
disc_rev_2 


np.float64(308.275)

In [21]:
# APPROACH 3 - lambda
def apply_discount_df_l(df, amount_col):
    # lambda function to disocunt based on condition, then sum
    return df[amount_col].apply(lambda x: x*0.99 if x<50 else x*0.995).sum()

disc_rev_3 = apply_discount_df_l(df, 'amount')
disc_rev_3 

np.float64(308.275)

In [22]:
# Practice Question 2:
# Problem: You are given a table of transactions, and your task is to write a Python function that calculates the total revenue after applying a discount rate based on the transaction amount.

# Discount Structure:
# If the transaction amount is less than $50, apply a discount of 1%.
# If the transaction amount is $50 or more, apply a discount of 0.5% on the first $50 and 1.5% on any amount above $50.
# Write a function that applies the discount and returns the total revenue after applying the discount to each transaction. 

import pandas as pd

# Create a sample DataFrame
data = {
    'transaction_id': [1, 2, 3, 4, 5],
    'amount': [25.00, 50.00, 75.00, 150.00, 10.00]
}

df = pd.DataFrame(data)

In [31]:
# APPROACH 1 - apply function

# write function that applies discount at the row level
def apply_discount(x):
    if x < 50:
        return x*.99 # 1% discount
    elif x >= 50:
        return (50*.995)+((x-50)*0.985) #0.5% discount first 50, 1.5% dicount remaining

def discount_revenue(df, amount_col):
    # apply the discount to all rows in the df and sum discounted amount to get total discounted revenue
    return df[amount_col].apply(apply_discount).sum()

total_disc_rev = discount_revenue(df, 'amount') 
print(total_disc_rev)
                        


307.025


In [38]:
# APPROACH 2 - insert and update

def discount_revenue_via_update(df, amount_col):
    # insert discount to all
    df['amount_discounted'] = df[amount_col]*0.99
    # update discount for condition above 50
    df.loc[df[amount_col] >= 50, 'amount_discounted'] = (50*0.995) + ((df[amount_col]-50))*0.985
    # sum revenue
    return df['amount_discounted'].sum()

total_disc_rev_2 = discount_revenue_via_update(df, 'amount') 
print(total_disc_rev_2)
                        

307.025


In [65]:
# Problem:
# Given a table of transactions (txn_data), each transaction belongs to a specific customer. Write a function to calculate the total amount of money made from these transactions after applying a discount based on the transaction amount. The discount rules are as follows:

# If the transaction amount is less than $50: Apply a 1% discount.
# If the transaction amount is between $50 and $100:
# Apply a 0.5% discount on the first $50.
# Apply a 1% discount on the amount between $50 and $100.
# If the transaction amount exceeds $100:
# Apply a 0.5% discount on the first $50.
# Apply a 1% discount on the amount between $50 and $100.
# Apply a 1.5% discount on the amount exceeding $100.
# After applying the discount, group the transactions by customer_id and calculate the total discounted amount for each customer.
import pandas as pd

data = {
    'customer_id': [1, 1, 2, 2, 3, 3, 3],
    'amount': [45, 60, 200, 75, 30, 150, 80]
}
df = pd.DataFrame(data)


In [75]:
# define function to apply dicsount based on conditions
def apply_discount(x):
    if x < 50: # If the transaction amount is less than $50: Apply a 1% discount.
        return x*0.99
    elif 50 <= x <= 100: # If the transaction amount is between $50 and $100:
        return (50*0.995)+((x-50)*0.99)
    elif x > 100: # If the transaction amount exceeds $100:
        return (50*0.995)+(50*0.99)+((x-100)*0.985)

def discount_df_by_customer(df, group_col, amount_col):
    # apply function to entire df
    df['amount_discounted'] = df[amount_col].apply(apply_discount)
    # get revenue sum by customer
    return df.groupby(group_col)['amount_discounted'].sum().reset_index()
            
disc_rev_by_cust = discount_df_by_customer(df, 'customer_id', 'amount')
disc_rev_by_cust

Unnamed: 0,customer_id,amount_discounted
0,1,104.2
1,2,272.25
2,3,257.65


In [76]:
# convert dict to df
import numpy as np
import pandas as pd

# Example dictionary data
data = {
    'user_1': [20, 75, 150],
    'user_2': [50, 60, 110],
    'user_3': [25, 55, 120],
}


In [88]:
# APPROACH 1 - convert to df then melt

# convert to df
df = pd.DataFrame(data).melt(var_name='customer_id', value_name='amount')
# remove 'user' prefix from id
df['customer_id'] = df['customer_id'].str.replace('user_', '')
#df['customer_id'].str[5:]
df

Unnamed: 0,customer_id,amount
0,1,20
1,1,75
2,1,150
3,2,50
4,2,60
5,2,110
6,3,25
7,3,55
8,3,120


In [93]:
# APPRAOCH 2 - use list comprehencino
# Flatten the dictionary into a list of tuples
flattened_data = [(user, txn) for user, txns in data.items() for txn in txns]
df = pd.DataFrame(flattened_data, columns=['customer_id', 'amount'])

In [101]:
df

Unnamed: 0,customer_id,amount
0,user_1,20
1,user_1,75
2,user_1,150
3,user_2,50
4,user_2,60
5,user_2,110
6,user_3,25
7,user_3,55
8,user_3,120


In [111]:
df_agg = df.groupby('customer_id').agg(['sum', 'mean']).reset_index()

In [113]:
df_agg.columns

MultiIndex([('customer_id',     ''),
            (     'amount',  'sum'),
            (     'amount', 'mean')],
           )

In [115]:
# Rank data

# Sample data
data = pd.Series([100, 200, 100, 300, 250])

# Different ranking methods
data.rank(method='average')  # average rank


0    1.5
1    3.0
2    1.5
3    5.0
4    4.0
dtype: float64

In [116]:
data.rank(method='min')      # min rank


0    1.0
1    3.0
2    1.0
3    5.0
4    4.0
dtype: float64

In [117]:
data.rank(method='max')      # max rank


0    2.0
1    3.0
2    2.0
3    5.0
4    4.0
dtype: float64

In [118]:
data.rank(method='first')    # rank by order of appearance


0    1.0
1    3.0
2    2.0
3    5.0
4    4.0
dtype: float64

In [119]:
data.rank(method='dense')    # dense rank

0    1.0
1    2.0
2    1.0
3    4.0
4    3.0
dtype: float64

# Lists

In [3]:
my_list = [1, 2, 3, 4, 5]

In [5]:
# first element
my_list[0]

1

In [12]:
# last element
my_list[-1]

5

In [13]:
# 2nd and 3rd element
my_list[1:3]

[2, 3]

In [14]:
# add to end of list
my_list.append(6)

In [15]:
my_list

[1, 2, 3, 4, 5, 6]

In [17]:
# add a number at plact to in list
my_list.insert(2,99)
my_list

[1, 2, 99, 3, 4, 5, 6]

In [18]:
# remove last element
my_list.pop()
my_list

6

In [20]:
# remove index 2 element
my_list.pop(2)
my_list

[1, 2, 3, 4, 5]

In [31]:
# remove elemnet with value 3
my_list.remove(3)
my_list

[1, 2, 4, 5, 3]

In [66]:
# LIFO
stack = []
stack.append(1)
stack.append(2)
stack.pop()
stack

[1]

In [76]:
# sort
my_list = [5,3,1,4,2]
my_list.sort(reverse = True)

In [79]:
my_list

[5, 4, 3, 2, 1]

In [83]:
# list to df
pd.DataFrame(my_list, columns=['col'])

Unnamed: 0,col
0,5
1,4
2,3
3,2
4,1


In [84]:
# count occurence
my_list.count(4)

1

In [92]:
# uniqe values (set)
dup_list = [0,0,1,1,1,2,2,3,3,4]
list(set(dup_list))

[0, 1, 2, 3, 4]

In [96]:
# num vals in list
len(dup_list)

10

In [99]:
# get duplicates in list
from collections import Counter 
counts = Counter(dup_list) # gets counts
[num for num,count in counts.items() if count > 1]

[0, 1, 2, 3]

In [103]:
# get duplicates in list
seen = set()
duplicate = set()

for i in dup_list:
    if i in seen:
        duplicate.add(i)
    else:
        seen.add(i)

list(duplicate)

[0, 1, 2, 3]

In [105]:
# count of occurences of each in list
counts = {}
for num in dup_list:
    if num in counts:
        counts[num] += 1
    else:
        counts[num] = 1
counts

{0: 2, 1: 3, 2: 2, 3: 2, 4: 1}

In [115]:
a = [1,3,5,6]
b = 2
a.append(b)
a.sort()
a.index(b)

1

In [29]:
# list comp
[i ** 2 for i in my_list]

In [134]:
# how to add value to evertying in list. add to lists together
list1 = [1, 2, 3]
list2 = [4, 5, 6]

list3 = [list1[i] + list2[i] for i in range(len(list1))]
list3

[5, 7, 9]

In [136]:
# make two lists a df
list1 = [1, 2, 3]
list2 = ['a', 'b', 'c']

df = pd.DataFrame({
    'Col1': list1,
    'Col2': list2
})
df

Unnamed: 0,Col1,Col2
0,1,a
1,2,b
2,3,c


In [165]:
# find the missing number in the list
lst = [1, 2, 4, 5, 6]

# get the sum of all numbers in the list if not missing
all_num_sum = sum(range(max(lst)+1))
# get the sum of all number in the list if missing
lst_sum = sum(lst)
missing = all_num_sum - lst_sum
missing

3

In [170]:
# rotate list by k steps
lst = [1, 2, 3, 4, 5]
k = 2
lst[-k:] + lst[:-k]


[4, 5, 1, 2, 3]

In [171]:
a = 'hello'
a[1]

'e'

In [196]:
# use map to apply a function
def add_two(num):
    return num+2

numbers = [1, 2, 3, 4, 5]

list(map(add_two, numbers))

[3, 4, 5, 6, 7]

# Dictionary

In [32]:
my_dict = {"name": "Shane", "age": 30, "job": "Data Scientist"}

In [33]:
my_dict

{'name': 'Shane', 'age': 30, 'job': 'Data Scientist'}

In [36]:
# get name value
my_dict['name']

'Shane'

In [39]:
# add city
my_dict['town'] = 'Tahoe'

In [42]:
# update city
my_dict['town'] = 'SLT'

In [43]:
my_dict

{'name': 'Shane', 'age': 30, 'job': 'Data Scientist', 'town': 'SLT'}

In [44]:
# delete city
del my_dict['town']
my_dict

{'name': 'Shane', 'age': 30, 'job': 'Data Scientist'}

In [48]:
my_dict.items()

dict_items([('name', 'Shane'), ('age', 30), ('job', 'Data Scientist')])

In [60]:
# loop through dict
for key, value in my_dict.items():
    print(key + ': ' + str(value))

name: Shane
age: 30
job: Data Scientist


In [133]:
# get key of max value
nums = [2,2,1,1,1,2,2,3,3,3,3,3,3,3,3,3,3,3,4,4]
item_counts = {}
for i in nums:
    if i in item_counts:
        item_counts[i] = item_counts[i] + 1
    else:
        item_counts[i] = 1
max(item_counts, key=item_counts.get)

3

In [139]:
# dict to df
dict1 = {'col1': 1, 'col2': 2, 'col3': 3}
dict2 = {'col1': 4, 'col2': 5, 'col3': 6}

df = pd.DataFrame([dict1, dict2])
df

Unnamed: 0,col1,col2,col3
0,1,2,3
1,4,5,6


In [176]:
# merge two dicionarries
dict1 = {'a': 1, 'b': 2}
dict2 = {'b': 3, 'c': 4}

dict_merged = dict1.copy()
for key,value in dict2.items():
    if key in dict_merged:
        dict_merged[key] += value
    else:
        dict_merged[key] = value

dict_merged

{'a': 1, 'b': 5, 'c': 4}

In [180]:
# invert dictionary
d = {'a': 1, 'b': 2, 'c': 3}
{value:key for key,value in d.items()}

{1: 'a', 2: 'b', 3: 'c'}

In [181]:
# takes a list of words and returns a dictionary where the keys are the words and the values count appeared
words = ["apple", "banana", "apple", "orange", "banana", "apple"]

# init dict
word_count = {}
# count first time seen
# add 1 if seen already

for word in words:
    if word in word_count:
        word_count[word] +=1
    else:
        word_count[word] = 1

word_count


{'apple': 3, 'banana': 2, 'orange': 1}

In [194]:
# sort dict
my_dict = {'a': 2, 'c': 1, 'b': 3}
# keys
dict(sorted(my_dict.items()))
# values (descending)
dict(sorted(my_dict.items(), key=lambda item: item[1], reverse=True))

{'b': 3, 'a': 2, 'c': 1}

{'b': 3, 'a': 2, 'c': 1}

In [198]:
# map values to a df from a dict
df = pd.DataFrame({
    'city': ['NY', 'LA', 'SF', 'NY', 'LA']
})

# Dictionary to map cities to full names
city_map = {'NY': 'New York', 'LA': 'Los Angeles', 'SF': 'San Francisco'}

df['city_full'] = df['city'].map(city_map)
df

Unnamed: 0,city,city_full
0,NY,New York
1,LA,Los Angeles
2,SF,San Francisco
3,NY,New York
4,LA,Los Angeles


In [200]:
# map dict to list 
# map values to a df from a dict
city_list =  ['NY', 'LA', 'SF', 'NY', 'LA']

# Dictionary to map cities to full names
city_map = {'NY': 'New York', 'LA': 'Los Angeles', 'SF': 'San Francisco'}

[city_map[i] for i in city_list]

['New York', 'Los Angeles', 'San Francisco', 'New York', 'Los Angeles']

In [204]:
# return unknown if key not in dict
city_map.get('SB', 'unknown')

'unknown'