In [None]:
import pandas as pd
import numpy as np

from datetime import datetime, timedelta
from datetime import date

import seaborn as sns 
import matplotlib.pyplot as plt
import matplotlib
import sys
sys.path.append('/home/jupyter/aaaie-customer-personas/src')

from customer_personas.bq_utils import export_dataframe_to_bq, upload_data_to_gcs
matplotlib.rcParams['figure.figsize'] = (22, 8)

## Get WLN Calls to Country
Pakistan

In [None]:
%%bigquery wln_calls_to_country_df

SELECT
    wln_vc_usg_dtl_dt,
    cust_id,
    billg_acct_id,
    billg_srvc_tel_num,
    orig_srvc_tel_num,
    orig_place_nm,
    orig_cntry_nm,
    term_cntry_nm,
    event_durtn_sec_qty
FROM `cio-datahub-enterprise-pr-183a.ent_usage_rated.bq_wln_voice_usg_dtl` 
WHERE DATE(wln_vc_usg_dtl_dt) > DATE_SUB(CURRENT_DATE(), INTERVAL 3 MONTH)
AND UPPER(term_cntry_nm) = 'PAKISTAN' 
    

In [None]:
wln_calls_to_country_df.head()

In [None]:
wln_calls_to_country_df.to_csv('pakistan_wln_calls.csv', index=False)

In [None]:
bi_project_id =  'cto-wln-sa-data-pr-bb5283'
bi_bkt_name = 'temp_workspace'
upload_data_to_gcs(
                    project_id=bi_project_id, 
                    bucket_name=bi_bkt_name, 
                    gcs_path='pakistan_wln_calls.csv', 
                    local_path='pakistan_wln_calls.csv')

### Plot Number of Calls over Time

In [None]:
wln_calls_to_country_by_day_df = wln_calls_to_country_df.groupby('wln_vc_usg_dtl_dt')[['cust_id']].count().reset_index().rename(columns={'cust_id' : 'calls', 'wln_vc_usg_dtl_dt' : 'date'})
                                                                                                        

In [None]:
wln_calls_to_country_df['month']=wln_calls_to_country_df['wln_vc_usg_dtl_dt'].dt.to_period('M')

Wireline Number of Unique Customers calling to Pakistan

In [None]:
wln_calls_to_country_df.groupby('month')[['cust_id']].nunique().reset_index().rename(columns={'cust_id' : 'wln_customers'})

Wireline Number of Calls to Pakistan

In [None]:
wln_calls_to_country_df.groupby('month')[['cust_id']].count().reset_index().rename(columns={'cust_id' : 'wln_calls'})

In [None]:
str(wln_calls_to_country_by_day_df.iloc[0, 0]).split(' ')[0]

In [None]:
str(wln_calls_to_country_by_day_df.iloc[-2, 0]).split(' ')[0]

In [None]:
plt.clf()
plt.subplot(1,2,1)
sns.lineplot(data=wln_calls_to_country_by_day_df.iloc[:-1, :], x='date', y='calls')
plt.ylabel('Number of Calls')
plt.xlabel('Date')
plt.xticks(rotation = 45)
plt.title('Total Number of Wireline calls to Pakistan From {date_1} to {date_2}'.format(date_1 =str(wln_calls_to_country_by_day_df.iloc[0, 0]).split(' ')[0], date_2=str(wln_calls_to_country_by_day_df.iloc[-2, 0]).split(' ')[0]))

## Get Unbilled WLS Calls to Country
Pakistan

In [None]:
%%bigquery wls_calls_to_country_df

SELECT
    chnl_szr_ts AS date,
    ban,
    subscr_num,
    call_to_city_desc,
    airtime_dur_sec,
    hm_pmn_id,
    srvc_pmn_id,
    
FROM `cio-datahub-enterprise-pr-183a.ent_usage_rated.bq_wls_ptpd_unbilld_dly_call_usg_evnt_dtl`
WHERE DATE(chnl_szr_ts) >  DATE_SUB(CURRENT_DATE(), INTERVAL 3 MONTH)
AND UPPER(call_to_city_desc) LIKE "%PAKISTAN%"


In [None]:
wls_calls_to_country_df.hm_pmn_id.unique()

In [None]:
wls_calls_to_country_df.srvc_pmn_id.unique()

In [None]:
wls_calls_to_country_df.head()

In [None]:
wls_calls_to_country_df.to_csv('pakistan_wls_calls.csv', index=False)

In [None]:
bi_project_id =  'cto-wln-sa-data-pr-bb5283'
bi_bkt_name = 'temp_workspace'
upload_data_to_gcs(
                    project_id=bi_project_id, 
                    bucket_name=bi_bkt_name, 
                    gcs_path='pakistan_wls_calls.csv', 
                    local_path='pakistan_wls_calls.csv')

In [None]:
wls_calls_to_country_df.head()

In [None]:
wls_calls_to_country_df['month']=wls_calls_to_country_df['date'].dt.to_period('M')

In [None]:
wls_calls_to_country_df.groupby('month')[['subscr_num']].nunique().reset_index().rename(columns={'subscr_num' : 'wls_customers'})

In [None]:
wls_calls_to_country_df.groupby('month')[['subscr_num']].count().reset_index().rename(columns={'subscr_num' : 'wls_calls'})

In [None]:
wls_calls_to_country_df['call_date'] =wls_calls_to_country_df['date'].apply(lambda x : x.date())

In [None]:
wls_calls_to_country_by_day_df = wls_calls_to_country_df.groupby('call_date')[['subscr_num']].count().reset_index().rename(columns={'subscr_num' : 'calls', 'call_date' : 'date'})
                                                                                                        

In [None]:
wls_calls_to_country_by_day_df

In [None]:
plt.clf()
plt.subplot(1,2,1)
sns.lineplot(data=wls_calls_to_country_by_day_df.iloc[:-1, :], x='date', y='calls')
plt.ylabel('Number of Calls')
plt.xlabel('Date')
plt.xticks(rotation = 45)
plt.title('Total Number of Wireless calls to Pakistan From {date_1} to {date_2}'.format(date_1 =str(wls_calls_to_country_by_day_df.iloc[0, 0]).split(' ')[0], date_2=str(wls_calls_to_country_by_day_df.iloc[-2, 0]).split(' ')[0]))

## Customers currently roaming and in Pakistan

In [None]:
%%bigquery roamers_df

with sms_df as 
 (
     SELECT 
      usg_subscr_ph_num as usage_subscriber_phone_num,
      CURRENT_DATE(),
      date(sms_data_dtl_start_ts) as usage_date,
      'sms' AS usage_type,
      itu_mbl_cntry_cd,
      count(billg_acct_num) as total_counts,
      sum(actl_units_qty) as usage_qty

 FROM
   `cio-datahub-enterprise-pr-183a.ent_usage_rated.bq_wls_sms_data_dtl` A

 WHERE
     DATE(sms_data_dtl_start_ts) > date_sub(CURRENT_DATE(),INTERVAL 90 DAY) 
     and itu_mbl_cntry_cd = '410' -- Country Code for Pakistan 410
  group by 1,2,3,4,5

   )
, intrnet_usage as 
(
  SELECT 
    
    usg_subscr_ph_num as usage_subscriber_phone_num,
    CURRENT_DATE(),
    date(inet_data_dtl_start_ts) as usage_date,
    'data' AS usage_type,
    itu_mbl_cntry_cd,
    count(billg_acct_num) as total_counts,
    -- IF (itu_mbl_cntry_cd = '302', False, True) as roaming_usage_flag,
    sum(actl_units_qty) as usage_qty


 FROM
    `cio-datahub-enterprise-pr-183a.ent_usage_rated.bq_wls_inet_srvc_prov_data_dtl` A
WHERE
   (DATE(A.inet_data_dtl_start_ts) > date_sub(CURRENT_DATE(),INTERVAL 90 DAY))
   and A.itu_mbl_cntry_cd = '410'
  
  group by 1,2,3,4,5
  -- having actl_units_qty > 0
  order by 1,2,3,4
 
  
),
voice_df as  (
    SELECT 
      
      subscr_num as usage_subscriber_phone_num,
      CURRENT_DATE(),
      date(chnl_szr_ts) as usage_date,
      'voice' AS usage_type,
      srvc_pmn_id,
      count(*) as total_counts,
      count(*) as usage_qty
    FROM
      `cio-datahub-enterprise-pr-183a.ent_usage_rated.bq_wls_ptpd_billd_dly_call_usg_evnt_dtl` A

    WHERE
    
        DATE(chnl_szr_ts) > date_sub(CURRENT_DATE(),INTERVAL 90 DAY) 
        and srvc_pmn_id like '%PAK%'
        
      group by 1,2,3,4,5
      
)

, combined_df as 
 (

  select * from sms_df
  union distinct
  select * from intrnet_usage
  Union distinct
  Select * from voice_df
)
    
SELECT * FROM combined_df    

In [None]:
roamers_df.usage_subscriber_phone_num.nunique()

In [None]:
roamers_df['month'] = pd.to_datetime(roamers_df['usage_date']).dt.to_period('M')

In [None]:
roamers_df.groupby('month')[['usage_subscriber_phone_num']].nunique()