In [1]:
import numpy as np
import pandas as pd


In [2]:
#TODO : Change the mount path for file
df = pd.read_csv('/Users/kaytie/Documents/Git/MiC3/data/ipdr.csv') 

In [3]:
# Step 1: Select each MSISDN
distinct_msisdn = df['msisdn'].unique()
print("MSISDNs:", distinct_msisdn)

MSISDNs: [1 2 3 4]


In [4]:
# check distinct domains
distinct_domains = df['domain'].unique()
print("Domains:", distinct_domains)

Domains: ['app1' 'app2' 'app3' 'app4']


In [5]:
# Group by MSISDN and Domain
grouped = df.groupby(['msisdn', 'domain'])

In [6]:
# Convert to datetime
df['starttime'] = pd.to_datetime(df['starttime'], format='%Y-%m-%d%H:%M:%S')
df['endtime'] = pd.to_datetime(df['endtime'], format='%Y-%m-%d%H:%M:%S')

In [7]:
# # STEP 5 : Calculate total volume in Kb 
df['total_volume_kb'] = (df['ulvolume'] + df['dlvolume']) * 8 / 1024

In [8]:
# # Calculate duration in seconds for each FDR
# df['duration_seconds'] = (df['endtime'] - df['starttime']).dt.total_seconds()

In [9]:
# store the final output
output = []

In [10]:
# Group by each domain per msisdn
for (msisdn, domain), group in grouped:
    
    # Step 3.a : Calculate start time (ST) and end time (ET) for each FDR
    st = group['starttime'].min()
    et = group['endtime'].max()
    
    # Step 4: FDR
    et_star = et - pd.Timedelta(minutes=10)
    # Step 4a: FDR
    # Apply rule: If ET-10 minutes < ST, keep the original ET
    if et_star < st:
        et_star = et
    
    # STEP 5 : Calculate total volume in Kb per domain
#     df['total_volume_kb'] = (df['ulvolume'] + df['dlvolume']) * 8 / 1024
    total_volume = group['total_volume_kb'].sum()
    
    # Step 6 : Calculate total time of each call in seconds
    total_time = (et_star - st).total_seconds()
    
    # Step 7 : Calculate bit rate of each call
    bit_rate = total_volume / total_time if total_time > 0 else 0
    
    # Calculate duration in seconds for each FDR
    df['duration_seconds'] = (df['endtime'] - df['starttime']).dt.total_seconds()
    
    # Step 8 : Identification of Audio or video call and its count
    # Step 8a: Discard call record if bit rate is less than 10 kbps
    if bit_rate < 10:
        continue
    # Step 8b: Assuming <=200 Kbps is audio call, >200 kbps is video call
    is_audio = bit_rate <= 200
    is_video = bit_rate > 200

    # Count the number of CDRs (FDR count) to make a single call
    fdr_count = group.shape[0]

    output.append({
        'msisdn': msisdn,
        'domain': domain,
        'duration_seconds': total_time,
        'fdr_count': fdr_count,
        'kbps': bit_rate,
        'isAudio': int(is_audio),
        'isVideo': int(is_video)
    })

# Convert the output list to a DataFrame
output_df = pd.DataFrame(output)




In [11]:
display(output_df)

Unnamed: 0,msisdn,domain,duration_seconds,fdr_count,kbps,isAudio,isVideo
0,1,app1,2.0,2,185.582031,1,0
1,2,app2,131.0,5,17.04932,1,0
2,3,app3,423.0,6,72.148715,1,0
