## Data Cleaning sample and Working with Local Database connection

In [1]:
# Import modules
import sys
import os
import pandas as pd

In [2]:
# Append the project root path to sys.path
sys.path.append(os.path.abspath(".."))

In [3]:
# Now import the modules
from scripts.data_cleaning import load_csv, clean_dataframe, save_cleaned_data
from scripts.database_setup import get_db_connection, create_table, insert_data

### Load and Inspect Raw Data

In [4]:
df = load_csv("../data/DoctorsET_data.csv")

# Show first few rows
df.head(10)


2025-01-30 11:35:06,915 - INFO - ✅ CSV file '../data/DoctorsET_data.csv' loaded successfully.


Unnamed: 0,Channel Title,Channel Username,ID,Message,Date,Media Path
0,Doctors Ethiopia,@DoctorsET,864,https://youtu.be/5DBoEm-8kmA?si=LDLuEecNfULJVD...,2023-12-18 17:04:02+00:00,photos/@DoctorsET_864.jpg
1,Doctors Ethiopia,@DoctorsET,863,ዶክተርስ ኢትዮጵያ በ አዲስ አቀራረብ በ ቴሌቪዥን ፕሮግራሙን ለመጀመር ከ...,2023-11-03 16:14:39+00:00,photos/@DoctorsET_863.jpg
2,Doctors Ethiopia,@DoctorsET,862,ሞት በስኳር \n\nለልጆቻችን የምናሲዘው ምሳቃ ሳናቀው እድሚያቸውን ይቀን...,2023-10-02 16:37:39+00:00,photos/@DoctorsET_862.jpg
3,Doctors Ethiopia,@DoctorsET,861,ከ HIV የተፈወሰ ሰው አጋጥሟችሁ ያቃል ? ፈውስ እና ህክምና ?\n\nሙ...,2023-09-16 07:54:32+00:00,photos/@DoctorsET_861.jpg
4,Doctors Ethiopia,@DoctorsET,860,በቅርብ ጊዜ በሃገራችን ላይ እየተስተዋለ ያለ የተመሳሳይ ፆታ ( Homos...,2023-09-01 16:16:15+00:00,photos/@DoctorsET_860.jpg
5,Doctors Ethiopia,@DoctorsET,859,👇👇👇👇👇👇 https://youtu.be/-AR1KO2DbFw?si=47cXLZt...,2023-08-29 17:20:05+00:00,photos/@DoctorsET_859.jpg
6,Doctors Ethiopia,@DoctorsET,848,ክረምቱን ስፖርት መስራት አስበው ጂም ለመግባት ካልቻሉ ባሉበት ቦታ ሆነው...,2022-08-02 17:42:08+00:00,photos/@DoctorsET_848.jpg
7,Doctors Ethiopia,@DoctorsET,847,ስፖርት የመስራት ሱስ ይኖር ይሆን?\n\nበአሁኑ ወቅት ብዙ የስፖርት መስ...,2022-06-12 17:15:47+00:00,photos/@DoctorsET_847.jpg
8,Doctors Ethiopia,@DoctorsET,846,ድንገተኛ አደጋ / የአጥንት ስብራት\n\nአያርገውና ድንገተኛ የሆነ አደጋ...,2022-05-31 17:51:13+00:00,photos/@DoctorsET_846.jpg
9,Doctors Ethiopia,@DoctorsET,845,ከትንሽ ግዚያት በፊት ስፖርት መስራት እንደ ቅንጦት ይታይ ነበር አሁን ላ...,2022-05-20 18:04:53+00:00,photos/@DoctorsET_845.jpg


### Clean and Standardize Data

In [6]:
df_cleaned = clean_dataframe(df)

# Display cleaned dataset
df_cleaned.head(10)


2025-01-30 11:35:29,218 - INFO - ✅ Duplicates removed from dataset.
2025-01-30 11:35:30,287 - INFO - ✅ Date column formatted to datetime.
2025-01-30 11:35:30,303 - INFO - ✅ Missing values filled.
2025-01-30 11:35:30,329 - INFO - ✅ Text columns standardized.
2025-01-30 11:35:30,336 - INFO - ✅ Emojis extracted and stored in 'emoji_used' column.
2025-01-30 11:35:30,345 - INFO - ✅ YouTube links extracted and stored in 'youtube_links' column.
2025-01-30 11:35:30,492 - INFO - ✅ Data cleaning completed successfully.


Unnamed: 0,channel_title,channel_username,message_id,message,message_date,media_path,emoji_used,youtube_links
0,Doctors Ethiopia,@DoctorsET,864,"በቀን አንዴ ብቻ የሚባለው የቢዝነስ አማካሪ በ 10,000 ብር ብቻ የተ...",2023-12-18 17:04:02+00:00,photos/@DoctorsET_864.jpg,👈👈👇👇,https://youtu.be/5DBoEm-8kmA?si=LDLuEecNfULJVD...
1,Doctors Ethiopia,@DoctorsET,863,ዶክተርስ ኢትዮጵያ በ አዲስ አቀራረብ በ ቴሌቪዥን ፕሮግራሙን ለመጀመር ከ...,2023-11-03 16:14:39+00:00,photos/@DoctorsET_863.jpg,👇,https://youtu.be/gwVN5eJQpko?si=xARsSxIEdZtE91GY
2,Doctors Ethiopia,@DoctorsET,862,ሞት በስኳር ለልጆቻችን የምናሲዘው ምሳቃ ሳናቀው እድሚያቸውን ይቀንሰው ...,2023-10-02 16:37:39+00:00,photos/@DoctorsET_862.jpg,No emoji,https://youtu.be/oHiSRrNF7I0?si=Absgm414YSt_kjNq
3,Doctors Ethiopia,@DoctorsET,861,ከ HIV የተፈወሰ ሰው አጋጥሟችሁ ያቃል ? ፈውስ እና ህክምና ? ሙሉ ቪ...,2023-09-16 07:54:32+00:00,photos/@DoctorsET_861.jpg,👇👇👇👇,https://youtu.be/tTeErZxIh_Q?si=jKHyfWcC3sfXbC8L
4,Doctors Ethiopia,@DoctorsET,860,በቅርብ ጊዜ በሃገራችን ላይ እየተስተዋለ ያለ የተመሳሳይ ፆታ ( Homos...,2023-09-01 16:16:15+00:00,photos/@DoctorsET_860.jpg,No emoji,https://youtu.be/0k65P5ouw7s?si=qaUgo75bUa3AMQxD
5,Doctors Ethiopia,@DoctorsET,859,ዶክተርስ ኢትዮጽያ በአዲስ ፕሮገራም ጀመረ ማረጥ (ሜኖፖዝ ) ጋር ተያይ...,2023-08-29 17:20:05+00:00,photos/@DoctorsET_859.jpg,👇👇👇👇👇👇,https://youtu.be/-AR1KO2DbFw?si=47cXLZtlmhx1Nl...
6,Doctors Ethiopia,@DoctorsET,848,ክረምቱን ስፖርት መስራት አስበው ጂም ለመግባት ካልቻሉ ባሉበት ቦታ ሆነው...,2022-08-02 17:42:08+00:00,photos/@DoctorsET_848.jpg,👇👇👇👇👇,https://youtu.be/0uiTzjEbh90
7,Doctors Ethiopia,@DoctorsET,847,ስፖርት የመስራት ሱስ ይኖር ይሆን? በአሁኑ ወቅት ብዙ የስፖርት መስሪያ ...,2022-06-12 17:15:47+00:00,photos/@DoctorsET_847.jpg,👇👇👇👇👇👇,https://youtu.be/WPlRuRtQXN8
8,Doctors Ethiopia,@DoctorsET,846,ድንገተኛ አደጋ / የአጥንት ስብራት አያርገውና ድንገተኛ የሆነ አደጋ ቢደ...,2022-05-31 17:51:13+00:00,photos/@DoctorsET_846.jpg,👇👇👇👇👇👇👇,https://youtu.be/QI-8oqW80uI
9,Doctors Ethiopia,@DoctorsET,845,ከትንሽ ግዚያት በፊት ስፖርት መስራት እንደ ቅንጦት ይታይ ነበር አሁን ላ...,2022-05-20 18:04:53+00:00,photos/@DoctorsET_845.jpg,👇👇👇👇👇👇,https://youtu.be/_IEWt07bECg


In [7]:
# Check for missing values in the cleaned DataFrame
missing_values = df_cleaned.isnull().sum()
missing_values[missing_values > 0]  # Display only columns with missing values


message_date    1
dtype: int64

In [8]:
# Drop the row with missing message_date
cleaned_df = df_cleaned.dropna(subset=['message_date'])

In [9]:
# Check for missing values in the cleaned DataFrame
missing_values = cleaned_df.isnull().sum()
missing_values[missing_values > 0]  # Display only columns with missing values


Series([], dtype: int64)

In [10]:
# Save cleaned data to CSV
save_cleaned_data(cleaned_df, "../data/cleaned_telegram_data.csv")

2025-01-30 11:35:41,062 - INFO - ✅ Cleaned data saved successfully to '../data/cleaned_telegram_data.csv'.


✅ Cleaned data saved successfully to '../data/cleaned_telegram_data.csv'.


## Connect to Database

In [11]:
engine = get_db_connection()

2025-01-30 11:35:51,745 - INFO - ✅ Successfully connected to the PostgreSQL database.


###  Create Table in PostgreSQL

In [12]:
create_table(engine)

2025-01-30 11:36:05,012 - INFO - ✅ Table 'telegram_messages' created successfully.


###  Insert Data into Database

In [13]:
# Load the cleaned CSV into a DataFrame
cleaned_df = pd.read_csv("../data/cleaned_telegram_data.csv")

In [14]:
# Ensure the 'message_date' column is in datetime format (to prevent NaT issues)
cleaned_df["message_date"] = pd.to_datetime(cleaned_df["message_date"], errors="coerce")

# Check if there are any missing values before inserting
missing_values = cleaned_df.isnull().sum()
print("Missing Values Before Insert:", missing_values)

Missing Values Before Insert: channel_title       0
channel_username    0
message_id          0
message             0
message_date        0
media_path          0
emoji_used          0
youtube_links       0
dtype: int64


In [15]:
df_cleaned

Unnamed: 0,channel_title,channel_username,message_id,message,message_date,media_path,emoji_used,youtube_links
0,Doctors Ethiopia,@DoctorsET,864,"በቀን አንዴ ብቻ የሚባለው የቢዝነስ አማካሪ በ 10,000 ብር ብቻ የተ...",2023-12-18 17:04:02+00:00,photos/@DoctorsET_864.jpg,👈👈👇👇,https://youtu.be/5DBoEm-8kmA?si=LDLuEecNfULJVD...
1,Doctors Ethiopia,@DoctorsET,863,ዶክተርስ ኢትዮጵያ በ አዲስ አቀራረብ በ ቴሌቪዥን ፕሮግራሙን ለመጀመር ከ...,2023-11-03 16:14:39+00:00,photos/@DoctorsET_863.jpg,👇,https://youtu.be/gwVN5eJQpko?si=xARsSxIEdZtE91GY
2,Doctors Ethiopia,@DoctorsET,862,ሞት በስኳር ለልጆቻችን የምናሲዘው ምሳቃ ሳናቀው እድሚያቸውን ይቀንሰው ...,2023-10-02 16:37:39+00:00,photos/@DoctorsET_862.jpg,No emoji,https://youtu.be/oHiSRrNF7I0?si=Absgm414YSt_kjNq
3,Doctors Ethiopia,@DoctorsET,861,ከ HIV የተፈወሰ ሰው አጋጥሟችሁ ያቃል ? ፈውስ እና ህክምና ? ሙሉ ቪ...,2023-09-16 07:54:32+00:00,photos/@DoctorsET_861.jpg,👇👇👇👇,https://youtu.be/tTeErZxIh_Q?si=jKHyfWcC3sfXbC8L
4,Doctors Ethiopia,@DoctorsET,860,በቅርብ ጊዜ በሃገራችን ላይ እየተስተዋለ ያለ የተመሳሳይ ፆታ ( Homos...,2023-09-01 16:16:15+00:00,photos/@DoctorsET_860.jpg,No emoji,https://youtu.be/0k65P5ouw7s?si=qaUgo75bUa3AMQxD
5,Doctors Ethiopia,@DoctorsET,859,ዶክተርስ ኢትዮጽያ በአዲስ ፕሮገራም ጀመረ ማረጥ (ሜኖፖዝ ) ጋር ተያይ...,2023-08-29 17:20:05+00:00,photos/@DoctorsET_859.jpg,👇👇👇👇👇👇,https://youtu.be/-AR1KO2DbFw?si=47cXLZtlmhx1Nl...
6,Doctors Ethiopia,@DoctorsET,848,ክረምቱን ስፖርት መስራት አስበው ጂም ለመግባት ካልቻሉ ባሉበት ቦታ ሆነው...,2022-08-02 17:42:08+00:00,photos/@DoctorsET_848.jpg,👇👇👇👇👇,https://youtu.be/0uiTzjEbh90
7,Doctors Ethiopia,@DoctorsET,847,ስፖርት የመስራት ሱስ ይኖር ይሆን? በአሁኑ ወቅት ብዙ የስፖርት መስሪያ ...,2022-06-12 17:15:47+00:00,photos/@DoctorsET_847.jpg,👇👇👇👇👇👇,https://youtu.be/WPlRuRtQXN8
8,Doctors Ethiopia,@DoctorsET,846,ድንገተኛ አደጋ / የአጥንት ስብራት አያርገውና ድንገተኛ የሆነ አደጋ ቢደ...,2022-05-31 17:51:13+00:00,photos/@DoctorsET_846.jpg,👇👇👇👇👇👇👇,https://youtu.be/QI-8oqW80uI
9,Doctors Ethiopia,@DoctorsET,845,ከትንሽ ግዚያት በፊት ስፖርት መስራት እንደ ቅንጦት ይታይ ነበር አሁን ላ...,2022-05-20 18:04:53+00:00,photos/@DoctorsET_845.jpg,👇👇👇👇👇👇,https://youtu.be/_IEWt07bECg


In [16]:
# Insert into the database
insert_data(engine, cleaned_df)

2025-01-30 11:37:26,261 - INFO - Inserting: 864 - 2023-12-18 17:04:02+00:00
2025-01-30 11:37:26,321 - INFO - Inserting: 863 - 2023-11-03 16:14:39+00:00
2025-01-30 11:37:26,327 - INFO - Inserting: 862 - 2023-10-02 16:37:39+00:00
2025-01-30 11:37:26,335 - INFO - Inserting: 861 - 2023-09-16 07:54:32+00:00
2025-01-30 11:37:26,341 - INFO - Inserting: 860 - 2023-09-01 16:16:15+00:00


2025-01-30 11:37:26,359 - INFO - Inserting: 859 - 2023-08-29 17:20:05+00:00
2025-01-30 11:37:26,370 - INFO - Inserting: 848 - 2022-08-02 17:42:08+00:00
2025-01-30 11:37:26,383 - INFO - Inserting: 847 - 2022-06-12 17:15:47+00:00
2025-01-30 11:37:26,392 - INFO - Inserting: 846 - 2022-05-31 17:51:13+00:00
2025-01-30 11:37:26,404 - INFO - Inserting: 845 - 2022-05-20 18:04:53+00:00
2025-01-30 11:37:26,412 - INFO - Inserting: 844 - 2022-05-15 15:59:10+00:00
2025-01-30 11:37:26,437 - INFO - Inserting: 843 - 2022-05-07 18:22:14+00:00
2025-01-30 11:37:26,443 - INFO - Inserting: 842 - 2022-05-06 17:51:05+00:00
2025-01-30 11:37:26,449 - INFO - Inserting: 841 - 2022-05-06 17:51:05+00:00
2025-01-30 11:37:26,460 - INFO - Inserting: 840 - 2022-05-06 17:51:05+00:00
2025-01-30 11:37:26,478 - INFO - Inserting: 839 - 2022-05-06 17:51:05+00:00
2025-01-30 11:37:26,567 - INFO - Inserting: 836 - 2022-05-01 11:13:33+00:00
2025-01-30 11:37:26,573 - INFO - Inserting: 835 - 2022-04-29 18:59:05+00:00
2025-01-30 1

###  Verify Inserted Data

In [17]:

query = "SELECT * FROM telegram_messages LIMIT 5;"
df_pg = pd.read_sql(query, engine)

df_pg


Unnamed: 0,id,channel_title,channel_username,message_id,message,message_date,media_path,emoji_used,youtube_links
0,1,Doctors Ethiopia,@DoctorsET,864,"በቀን አንዴ ብቻ የሚባለው የቢዝነስ አማካሪ በ 10,000 ብር ብቻ የተ...",2023-12-18 17:04:02,photos/@DoctorsET_864.jpg,👈👈👇👇,https://youtu.be/5DBoEm-8kmA?si=LDLuEecNfULJVD...
1,2,Doctors Ethiopia,@DoctorsET,863,ዶክተርስ ኢትዮጵያ በ አዲስ አቀራረብ በ ቴሌቪዥን ፕሮግራሙን ለመጀመር ከ...,2023-11-03 16:14:39,photos/@DoctorsET_863.jpg,👇,https://youtu.be/gwVN5eJQpko?si=xARsSxIEdZtE91GY
2,3,Doctors Ethiopia,@DoctorsET,862,ሞት በስኳር ለልጆቻችን የምናሲዘው ምሳቃ ሳናቀው እድሚያቸውን ይቀንሰው ...,2023-10-02 16:37:39,photos/@DoctorsET_862.jpg,No emoji,https://youtu.be/oHiSRrNF7I0?si=Absgm414YSt_kjNq
3,4,Doctors Ethiopia,@DoctorsET,861,ከ HIV የተፈወሰ ሰው አጋጥሟችሁ ያቃል ? ፈውስ እና ህክምና ? ሙሉ ቪ...,2023-09-16 07:54:32,photos/@DoctorsET_861.jpg,👇👇👇👇,https://youtu.be/tTeErZxIh_Q?si=jKHyfWcC3sfXbC8L
4,5,Doctors Ethiopia,@DoctorsET,860,በቅርብ ጊዜ በሃገራችን ላይ እየተስተዋለ ያለ የተመሳሳይ ፆታ ( Homos...,2023-09-01 16:16:15,photos/@DoctorsET_860.jpg,No emoji,https://youtu.be/0k65P5ouw7s?si=qaUgo75bUa3AMQxD
