In [1]:
import pandas as pd

In [6]:
# Creating a DataFrame with sample data
data = {
    'score_1': [0.1, 0.3, 0.05, 0.2, 0.8],
    'score_2': [0.4, 0.2, 0.6, 0.7, 0.3]
}

df = pd.DataFrame(data)
df

Unnamed: 0,score_1,score_2
0,0.1,0.4
1,0.3,0.2
2,0.05,0.6
3,0.2,0.7
4,0.8,0.3


# Task 1. Data Manipulation Basics

### 1. Create a new column in the data frame called highlighted, which is a Boolean value representing whether a record passes the following logic check:
* Both columns are below 0.35, OR
* score_1 is below 0.20 and score_2 is below 0.90,OR
* score_1 is below 0.15 and score_2 is below 0.80



In [7]:
# Create a new column 'highlighted' with default False values
df['highlighted'] = False

# Apply logic checks and update 'highlighted' column accordingly
df.loc[(df['score_1'] < 0.35) & (df['score_2'] < 0.35), 'highlighted'] = True
df.loc[(df['score_1'] < 0.20) & (df['score_2'] < 0.90), 'highlighted'] = True
df.loc[(df['score_1'] < 0.15) & (df['score_2'] < 0.80), 'highlighted'] = True

# Display the updated DataFrame
df

Unnamed: 0,score_1,score_2,highlighted
0,0.1,0.4,True
1,0.3,0.2,True
2,0.05,0.6,True
3,0.2,0.7,False
4,0.8,0.3,False


### 2. Create a categorical column called risk_1_group, which is based on score_1 values, as following:
* score_1 score1_group
* x < 0.10 'Very Low'
* 0.10 <= x < 0.30 'Medium'
* 0.30 <= x < 0.80 'High'
* x >= 0.80 'Very High'

In [8]:
# Define the bins and labels for score_1 groups
bins = [-float("inf"), 0.10, 0.30, 0.80, float("inf")]
labels = ['Very Low', 'Medium', 'High', 'Very High']

# Create the categorical column 'risk_1_group' based on score_1 values
df['risk_1_group'] = pd.cut(df['score_1'], bins=bins, labels=labels, right=False)

# Display the updated DataFrame
df

Unnamed: 0,score_1,score_2,highlighted,risk_1_group
0,0.1,0.4,True,Medium
1,0.3,0.2,True,High
2,0.05,0.6,True,Very Low
3,0.2,0.7,False,Medium
4,0.8,0.3,False,Very High


# Task 2 - Python Class Basics

Suppose we want to create a Reimbursement class that describes spending for Ads that a vendor can run and get reimbursed per following specs:

* Ad_Type Cost_Share_Rate (per dollar) Allowed_Spend_per_Ad
* 0011 0.50 \$200
* 1011 1.00 \$1000 to \$2000
* 1111 0.75 \$500
* 1010 0.90 Up to \$750

The class should have:
1. A function to initialize the Ads object.
2. A data structure to track how many Ads are there in each type of Ad.
3. A function to add and remove Ads of the specific type.
4. A function to print the content of the Ads object.
5. A function that returns the total amount of reimbursement.

Please show your code that defines the class, then unit test the class to ensure it behaves as you designed.

In [10]:
class Reimbursement:
    def __init__(self, ads_data):
        self.ads = {}
        for ad in ads_data:
            self.ads[ad['ad_type']] = {
                'cost_share_rate': ad['cost_share_rate'],
                'allowed_spend_per_ad': ad['allowed_spend_per_ad'],
                'count': 0
            }

    def add_ad(self, ad_type):
        if ad_type in self.ads:
            self.ads[ad_type]['count'] += 1
        else:
            print(f"Invalid ad type: {ad_type}")

    def remove_ad(self, ad_type):
        if ad_type in self.ads and self.ads[ad_type]['count'] > 0:
            self.ads[ad_type]['count'] -= 1
        else:
            print(f"No ad of type {ad_type} to remove.")

    def print_ads(self):
        print("Ads:")
        for ad_type, ad_info in self.ads.items():
            print(f"{ad_type}: {ad_info['count']} ads")

    def total_reimbursement(self):
        total = 0
        for ad_type, ad_info in self.ads.items():
            if ad_info['count'] > 0:
                cost_share_rate = ad_info['cost_share_rate']
                if isinstance(ad_info['allowed_spend_per_ad'], tuple):
                    min_spend, max_spend = ad_info['allowed_spend_per_ad']
                    total += min(ad_info['count'] * min_spend, max_spend) * cost_share_rate
                else:
                    total += ad_info['count'] * ad_info['allowed_spend_per_ad'] * cost_share_rate
        return total

# Unit Test
ads_data = [
    {'ad_type': '0011', 'cost_share_rate': 0.50, 'allowed_spend_per_ad': 200},
    {'ad_type': '1011', 'cost_share_rate': 1.00, 'allowed_spend_per_ad': (1000, 2000)},
    {'ad_type': '1111', 'cost_share_rate': 0.75, 'allowed_spend_per_ad': 500},
    {'ad_type': '1010', 'cost_share_rate': 0.90, 'allowed_spend_per_ad': 'Up to 750'}
]

reimbursement = Reimbursement(ads_data)

# Add ads
reimbursement.add_ad('0011')
reimbursement.add_ad('1011')
reimbursement.add_ad('1011')
reimbursement.add_ad('1111')

# Print ads and total reimbursement before removal
print("Ads and Total Reimbursement Before Removal:")
reimbursement.print_ads()
print("Total reimbursement:", reimbursement.total_reimbursement())

# Remove an ad
reimbursement.remove_ad('0011')

# Print ads and total reimbursement after removal
print("\nAds and Total Reimbursement After Removal:")
reimbursement.print_ads()
print("Total reimbursement:", reimbursement.total_reimbursement())

Ads and Total Reimbursement Before Removal:
Ads:
0011: 1 ads
1011: 2 ads
1111: 1 ads
1010: 0 ads
Total reimbursement: 2475.0

Ads and Total Reimbursement After Removal:
Ads:
0011: 0 ads
1011: 2 ads
1111: 1 ads
1010: 0 ads
Total reimbursement: 2375.0


# Task 3 - Data Analysis

In [21]:
df = pd.read_csv('task3_dateset.csv')

df['Corrected_Ads_Run'] = df.groupby('Site').cumcount() + 1

In [23]:
df.head(10)

Unnamed: 0,Site,Date,Ads_Run,Corrected_Ads_Run
0,A,9/24/20,0,1
1,A,9/23/20,0,2
2,A,9/22/20,0,3
3,A,9/21/20,0,4
4,A,9/20/20,0,5
5,A,9/19/20,0,6
6,A,9/18/20,6,7
7,A,9/17/20,5,8
8,A,9/16/20,0,9
9,A,9/15/20,0,10


In [22]:
df.tail(10)

Unnamed: 0,Site,Date,Ads_Run,Corrected_Ads_Run
197,D,8/10/20,44,46
198,D,8/9/20,43,47
199,D,8/8/20,42,48
200,D,8/7/20,55,49
201,D,8/6/20,54,50
202,D,8/5/20,53,51
203,D,8/4/20,52,52
204,D,8/3/20,51,53
205,D,8/2/20,50,54
206,D,8/1/20,49,55
