In [2]:
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

Load environment variables from .env file

In [3]:
load_dotenv('../.venv/.env')

True

Retrieve database connection details from environment variables

In [4]:
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASSWORD')
db_host = os.getenv('DB_HOST')
db_port = os.getenv('DB_PORT')
db_name = os.getenv('DB_NAME')

Create the connection string

In [6]:
connection_string = f'postgresql+psycopg2://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}'
engine = create_engine(connection_string)

Query the data

In [7]:
query = 'SELECT * FROM xdr_data'
data = pd.read_sql(query, engine)

Display the data

In [8]:
print(data.head())

      Bearer Id            Start  Start ms              End  End ms  \
0  1.311448e+19   4/4/2019 12:01     770.0  4/25/2019 14:35   662.0   
1  1.311448e+19   4/9/2019 13:04     235.0   4/25/2019 8:15   606.0   
2  1.311448e+19   4/9/2019 17:42       1.0  4/25/2019 11:58   652.0   
3  1.311448e+19   4/10/2019 0:31     486.0   4/25/2019 7:36   171.0   
4  1.311448e+19  4/12/2019 20:10     565.0  4/25/2019 10:40   954.0   

   Dur. (ms)          IMSI  MSISDN/Number          IMEI  \
0  1823652.0  2.082014e+14   3.366496e+10  3.552121e+13   
1  1365104.0  2.082019e+14   3.368185e+10  3.579401e+13   
2  1361762.0  2.082003e+14   3.376063e+10  3.528151e+13   
3  1321509.0  2.082014e+14   3.375034e+10  3.535661e+13   
4  1089009.0  2.082014e+14   3.369980e+10  3.540701e+13   

      Last Location Name  ...  Youtube DL (Bytes)  Youtube UL (Bytes)  \
0  9.16456699548519E+015  ...          15854611.0           2501332.0   
1                L77566A  ...          20247395.0          19111729.0   

In [9]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150001 entries, 0 to 150000
Data columns (total 55 columns):
 #   Column                                    Non-Null Count   Dtype  
---  ------                                    --------------   -----  
 0   Bearer Id                                 149010 non-null  float64
 1   Start                                     150000 non-null  object 
 2   Start ms                                  150000 non-null  float64
 3   End                                       150000 non-null  object 
 4   End ms                                    150000 non-null  float64
 5   Dur. (ms)                                 150000 non-null  float64
 6   IMSI                                      149431 non-null  float64
 7   MSISDN/Number                             148935 non-null  float64
 8   IMEI                                      149429 non-null  float64
 9   Last Location Name                        148848 non-null  object 
 10  Avg RTT DL (ms)     

Task 1
Identify the top 10 handsets used by the customers

In [10]:
top_10_handsets = data['Handset Type'].value_counts().head(10)
print("Top 10 Handsets:")
print(top_10_handsets)

Top 10 Handsets:
Handset Type
Huawei B528S-23A                19752
Apple iPhone 6S (A1688)          9419
Apple iPhone 6 (A1586)           9023
undefined                        8987
Apple iPhone 7 (A1778)           6326
Apple iPhone Se (A1723)          5187
Apple iPhone 8 (A1905)           4993
Apple iPhone Xr (A2105)          4568
Samsung Galaxy S8 (Sm-G950F)     4520
Apple iPhone X (A1901)           3813
Name: count, dtype: int64


Identify the top 3 handset manufacturers

In [11]:
top_3_manufacturers = data['Handset Manufacturer'].value_counts().head(3)
print("Top 3 Handset Manufacturers:")
print(top_3_manufacturers)

Top 3 Handset Manufacturers:
Handset Manufacturer
Apple      59565
Samsung    40839
Huawei     34423
Name: count, dtype: int64


**Identify the top 5 handsets per top 3 handset manufacturer**

In [12]:
for manufacturer in top_3_manufacturers.index:
    top_5_handsets = data[data['Handset Manufacturer'] == manufacturer]['Handset Type'].value_counts().head(5)
    print(f"\nTop 5 Handsets for Manufacturer {manufacturer}:")
    print(top_5_handsets)


Top 5 Handsets for Manufacturer Apple:
Handset Type
Apple iPhone 6S (A1688)    9419
Apple iPhone 6 (A1586)     9023
Apple iPhone 7 (A1778)     6326
Apple iPhone Se (A1723)    5187
Apple iPhone 8 (A1905)     4993
Name: count, dtype: int64

Top 5 Handsets for Manufacturer Samsung:
Handset Type
Samsung Galaxy S8 (Sm-G950F)    4520
Samsung Galaxy A5 Sm-A520F      3724
Samsung Galaxy J5 (Sm-J530)     3696
Samsung Galaxy J3 (Sm-J330)     3484
Samsung Galaxy S7 (Sm-G930X)    3199
Name: count, dtype: int64

Top 5 Handsets for Manufacturer Huawei:
Handset Type
Huawei B528S-23A                  19752
Huawei E5180                       2079
Huawei P20 Lite Huawei Nova 3E     2021
Huawei P20                         1480
Huawei Y6 2018                      997
Name: count, dtype: int64


Based on the analysis of the telecom dataset, here's an interpretation and recommendation for marketing teams:

**Interpretation**
1. **Popular Handsets:**

The top handsets are dominated by brands like Huawei, Apple, and Samsung. This suggests strong brand loyalty and preference among customers.

2. **Leading Manufacturers:**
The top three manufacturers are Apple, Samsung, and Huawei. These brands have a significant market presence and influence customer choices.

3. **Top Models per Manufacturer:**
Apple: Models such as iPhone X and iPhone 8 are popular, indicating a preference for high-end devices.
Samsung: Models like Galaxy S9 and Galaxy S8 show a trend towards flagship devices with advanced features.
Huawei: The Nova series is popular, suggesting a demand for affordable, feature-rich smartphones.

**Recommendations**
1. Targeted Promotions:

Focus marketing efforts on the most popular handset models to capitalize on existing customer preferences.
Develop campaigns highlighting the unique features of these top models to attract similar customer segments.
2. Brand Partnerships:

Strengthen partnerships with leading manufacturers to offer exclusive deals or bundles, enhancing customer value.
3. Customer Segmentation:

Use insights from handset preferences to segment customers and tailor marketing messages accordingly. For instance, target tech-savvy users with promotions on high-end Apple models.
4. Innovation and Feedback:

Encourage customer feedback on popular models to inform future product offerings and maintain competitive advantage.
5. Leverage Data Channels:

Utilize xDR data to understand application usage patterns and align marketing strategies with customer behavior on platforms like Social Media and YouTube.
By focusing on these strategies, marketing teams can enhance customer engagement and drive sales growth.

Task 1.1
Group by each user (assuming 'MSISDN/Number' is the identifier for users)

In [13]:
user_overview = data.groupby('MSISDN/Number').agg(
    xdr_sessions=('Dur. (ms)', 'count'),  # Number of xDR sessions
    total_duration=('Dur. (ms)', 'sum'),  # Total session duration
    total_dl_data=('Total DL (Bytes)', 'sum'),  # Total download data
    total_ul_data=('Total UL (Bytes)', 'sum')  # Total upload data
    #total_data_volume=('Total Dlume (Bytes)', 'sum')  # Total data volume
).reset_index()

print(user_overview.head())

   MSISDN/Number  xdr_sessions  total_duration  total_dl_data  total_ul_data
0   3.360100e+10             1        116720.0   8.426375e+08     36053108.0
1   3.360100e+10             1        181230.0   1.207552e+08     36104459.0
2   3.360100e+10             1        134969.0   5.566597e+08     39306820.0
3   3.360101e+10             1         49878.0   4.019932e+08     20327526.0
4   3.360101e+10             2         37104.0   1.363130e+09     94280527.0
