In [1]:
import pandas as pd

## Load

In [2]:
transcript = pd.read_csv("../../data/processed/transcript.csv")
transcript.head()

Unnamed: 0,profile_id,event,time,portfolio_id,amount,transcript_reward
0,78afa995795e4d85b5d9ceeca43f5fef,offer_received,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,,
1,a03223e636434f42ac4c3df47e8bac43,offer_received,0,0b1e1539f2cc45b7b9fa7c272da2e1d7,,
2,e2127556f4f64592b11af22de27a7932,offer_received,0,2906b810c7d4411798c6938adc9daaa5,,
3,8ec6ce2a7e7949b1bf142def7d0e0586,offer_received,0,fafdcd668e3743c1bb461111dcafc2a4,,
4,68617ca6246f4fbc85e91a2a49552598,offer_received,0,4d5c57ea9a6940dd891ad53e9dbe8da0,,


## Add features

In [3]:
# Filter the dataframe for offer_received and offer_viewed events
offer_received = transcript[transcript['event'] == 'offer_received']
offer_viewed = transcript[transcript['event'] == 'offer_viewed']

# Count the number of offer_received and offer_viewed per profile_id per portfolio_id
received_count = offer_received.groupby(['profile_id', 'portfolio_id']).size().reset_index(name='received_count')
viewed_count = offer_viewed.groupby(['profile_id', 'portfolio_id']).size().reset_index(name='viewed_count')

# Merge the counts into a single dataframe
merged_counts = pd.merge(received_count, viewed_count, on=['profile_id', 'portfolio_id'], how='left')

# Calculate the score
merged_counts['score'] = merged_counts['viewed_count'] / merged_counts['received_count']

# Fill NaN values with 0 (in case there are profile_id and portfolio_id combinations with no viewed_count)
merged_counts['score'] = merged_counts['score'].fillna(0)

merged_counts.head()

Unnamed: 0,profile_id,portfolio_id,received_count,viewed_count,score
0,0009655768c64bdeb2e877511632db8f,2906b810c7d4411798c6938adc9daaa5,1,,0.0
1,0009655768c64bdeb2e877511632db8f,3f207df678b143eea3cee63160fa8bed,1,1.0,1.0
2,0009655768c64bdeb2e877511632db8f,5a8bc65990b245e5a138643cd4eb9837,1,1.0,1.0
3,0009655768c64bdeb2e877511632db8f,f19421c1d4aa40978ebb69ca19b0e20d,1,1.0,1.0
4,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,1,1.0,1.0


## Save

In [4]:
merged_counts.to_csv("../../data/features/transcript.csv", index=False)