In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [73]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import sqlite3

In [74]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [75]:
df = pd.read_csv('/content/drive/MyDrive/TV_Market_Analysis/data/raw/tv_dataset_cleaned.xls')

In [76]:
df.head()

Unnamed: 0,Name,Price,Display,Features,Connectivity,Design
0,OnePlus Y1S 40 inch (101 cm) LED Full HD TV,"Rs.18,990","40"" (101.6 cm), LED\nFull HD, 1920x1080\n60 Hz...",Apps\nVoice Control\nSmart Remote,Sound Output: 20 W\n2 USB Ports\n2 HDMI Ports,892 x 512.6 x 85.6 mm\nWeight: 5.1 kg\nColour:...
1,TCL 32S5403AF 32 inch (81 cm) LED Full HD TV,"Rs.13,990","32"" (81.28 cm), LED\nFull HD, 1920x1080\n60 Hz...",Apps\nVoice Control\nSmart Remote,Sound Output: 24 W\n1 USB Ports\n2 HDMI Ports,715 x 421 x 80 mm\nWeight: 3.44 kg\nColour: Black
2,TCL 32S5400A 32 inch (81 cm) LED HD-Ready TV,"Rs.9,990","32"" (81.28 cm), LED\nHD-Ready, 1366x768\n60 Hz...",Apps\nVoice Control\nSmart Remote,Sound Output: 24 W\n1 USB Ports\n2 HDMI Ports,715 x 468 x 80 mm\nWeight: 3.55 kg\nColour: Black
3,Sony BRAVIA KD-65X74L 65 inch (165 cm) LED 4K TV,"Rs.74,990","65"" (165.1 cm), LED\n4K, 3840x2160\n50 Hz Refr...",Apps\nVoice Control\nSmart Remote,Sound Output: 20 W\n2 USB Ports\n3 HDMI Ports,1463 x 852 x 87 mm\nWeight: 21.4 kg\nColour: B...
4,Sony BRAVIA KD-43X74K 43 inch (109 cm) LED 4K TV,"Rs.39,990","43"" (109.22 cm), LED\n4K, 3840x2160\n60 Hz Ref...",Apps and Games\nVoice Control\nSmart Remote,Sound Output: 20 W\n2 USB Ports\n3 HDMI Ports,971 x 575 x 77 mm\nWeight: 7.8 kg\nColour: Black


In [77]:
# Create an in-memory SQLite database
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()


# Load the Pandas DataFrame into the SQLite database
df.to_sql('tv_data', conn, index=False, if_exists='replace')

1061

In [78]:
# Define the query to check for missing values in each column
query = '''
SELECT
    SUM(CASE WHEN Name IS NULL THEN 1 ELSE 0 END) AS Name_missing,
    SUM(CASE WHEN Price IS NULL THEN 1 ELSE 0 END) AS Price_missing,
    SUM(CASE WHEN Display IS NULL THEN 1 ELSE 0 END) AS Display_missing,
    SUM(CASE WHEN Features IS NULL THEN 1 ELSE 0 END) AS Features_missing,
    SUM(CASE WHEN Connectivity IS NULL THEN 1 ELSE 0 END) AS Connectivity_missing,
    SUM(CASE WHEN Design IS NULL THEN 1 ELSE 0 END) AS Design_missing
FROM tv_data
'''

# Execute the query and load the result into a Pandas DataFrame
missing_values = pd.read_sql_query(query, conn)
missing_values

Unnamed: 0,Name_missing,Price_missing,Display_missing,Features_missing,Connectivity_missing,Design_missing
0,0,0,0,0,0,0


In [79]:
# Define the query to get the data types of each column
query = '''
PRAGMA table_info(tv_data)
'''

# Execute the query and load the result into a Pandas DataFrame
table_info = pd.read_sql_query(query, conn)
table_info


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Price,TEXT,0,,0
2,2,Display,TEXT,0,,0
3,3,Features,TEXT,0,,0
4,4,Connectivity,TEXT,0,,0
5,5,Design,TEXT,0,,0


View Basic Statistics of the Price Column

In [80]:
# Define the query to get basic statistics of the Price column, with rounded average price
query = '''
SELECT
    MIN(CAST(REPLACE(REPLACE(Price, 'Rs.', ''), ',', '') AS INTEGER)) AS min_price,
    MAX(CAST(REPLACE(REPLACE(Price, 'Rs.', ''), ',', '') AS INTEGER)) AS max_price,
    ROUND(AVG(CAST(REPLACE(REPLACE(Price, 'Rs.', ''), ',', '') AS INTEGER)), 2) AS avg_price
FROM tv_data
'''

# Execute the query and load the result into a Pandas DataFrame
price_statistics = pd.read_sql_query(query, conn)
price_statistics



Unnamed: 0,min_price,max_price,avg_price
0,5190,1999990,89344.21


Standardize the Price Column

In [81]:
# Convert the Price column to INTEGER while maintaining its position
query = '''
CREATE TABLE tv_data_temp AS
SELECT
    Name,
    CAST(REPLACE(REPLACE(Price, 'Rs.', ''), ',', '') AS INTEGER) AS Price,
    Display,
    Features,
    Connectivity,
    Design
FROM tv_data;

DROP TABLE tv_data;

ALTER TABLE tv_data_temp RENAME TO tv_data;
'''

# Execute the query to convert the Price column to INTEGER
cursor.executescript(query)
conn.commit()

# Verify the changes
query = '''
PRAGMA table_info(tv_data);
'''

# Execute the query and load the result into a Pandas DataFrame
updated_table_info = pd.read_sql_query(query, conn)
updated_table_info

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Price,INT,0,,0
2,2,Display,TEXT,0,,0
3,3,Features,TEXT,0,,0
4,4,Connectivity,TEXT,0,,0
5,5,Design,TEXT,0,,0


In [82]:
# Define the query to check for duplicates in the dataset
query = '''
SELECT
    Name,
    COUNT(*) as count
FROM
    tv_data
GROUP BY
    Name, Display, Features, Connectivity, Design, Price
HAVING
    COUNT(*) > 1
'''

# Execute the query and load the result into a Pandas DataFrame
duplicates = pd.read_sql_query(query, conn)
duplicates


Unnamed: 0,Name,count


In [83]:
# Remove leading and trailing spaces from the Name column
query = '''
UPDATE tv_data
SET Name = TRIM(Name);
'''

# Execute the query to remove leading and trailing spaces
cursor.execute(query)
conn.commit()

# Verify the changes
query = '''
SELECT DISTINCT Name
FROM tv_data
LIMIT 5
'''

# Execute the query and load the result into a Pandas DataFrame
cleaned_names = pd.read_sql_query(query, conn)
cleaned_names


Unnamed: 0,Name
0,OnePlus Y1S 40 inch (101 cm) LED Full HD TV
1,TCL 32S5403AF 32 inch (81 cm) LED Full HD TV
2,TCL 32S5400A 32 inch (81 cm) LED HD-Ready TV
3,Sony BRAVIA KD-65X74L 65 inch (165 cm) LED 4K TV
4,Sony BRAVIA KD-43X74K 43 inch (109 cm) LED 4K TV


In [84]:
# Convert the Name column to lower case
query = '''
UPDATE tv_data
SET Name = LOWER(Name);
'''

# Execute the query to convert to lower case
cursor.execute(query)
conn.commit()

# Verify the changes
query = '''
SELECT DISTINCT Name
FROM tv_data
LIMIT 5
'''

# Execute the query and load the result into a Pandas DataFrame
lower_case_names = pd.read_sql_query(query, conn)
lower_case_names


Unnamed: 0,Name
0,oneplus y1s 40 inch (101 cm) led full hd tv
1,tcl 32s5403af 32 inch (81 cm) led full hd tv
2,tcl 32s5400a 32 inch (81 cm) led hd-ready tv
3,sony bravia kd-65x74l 65 inch (165 cm) led 4k tv
4,sony bravia kd-43x74k 43 inch (109 cm) led 4k tv


In [85]:
# Extract the Brand column and place it after the Name column
# Add the new Brand column
query = '''
ALTER TABLE tv_data ADD COLUMN Brand TEXT;
'''

# Execute the query to add the Brand column
cursor.execute(query)
conn.commit()

# Update the Brand column by extracting the first word from the Name column
query = '''
UPDATE tv_data
SET Brand = SUBSTR(Name, 1, INSTR(Name, ' ') - 1);
'''

# Execute the query to populate the Brand column
cursor.execute(query)
conn.commit()

# Verify the changes
query = '''
PRAGMA table_info(tv_data);
'''

# Execute the query and load the result into a Pandas DataFrame
updated_table_info_with_brand = pd.read_sql_query(query, conn)
updated_table_info_with_brand


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Price,INT,0,,0
2,2,Display,TEXT,0,,0
3,3,Features,TEXT,0,,0
4,4,Connectivity,TEXT,0,,0
5,5,Design,TEXT,0,,0
6,6,Brand,TEXT,0,,0


In [86]:
# Create a temporary table with the desired column order
query = '''
CREATE TABLE tv_data_temp AS
SELECT
    Name,
    SUBSTR(Name, 1, INSTR(Name, ' ') - 1) AS Brand,
    Price,
    Display,
    Features,
    Connectivity,
    Design
FROM tv_data;

-- Drop the original table
DROP TABLE tv_data;

-- Rename the temporary table to the original table name
ALTER TABLE tv_data_temp RENAME TO tv_data;
'''

# Execute the queries to reorder columns
cursor.executescript(query)
conn.commit()

# Verify the changes
query = '''
PRAGMA table_info(tv_data);
'''

# Execute the query and load the result into a Pandas DataFrame
updated_table_info_reordered = pd.read_sql_query(query, conn)
updated_table_info_reordered


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Brand,,0,,0
2,2,Price,INT,0,,0
3,3,Display,TEXT,0,,0
4,4,Features,TEXT,0,,0
5,5,Connectivity,TEXT,0,,0
6,6,Design,TEXT,0,,0


In [87]:
# Change the Brand column type to TEXT
query = '''
ALTER TABLE tv_data
RENAME TO tv_data_temp;

CREATE TABLE tv_data (
    Name TEXT,
    Brand TEXT,
    Price INTEGER,
    Display TEXT,
    Features TEXT,
    Connectivity TEXT,
    Design TEXT
);

INSERT INTO tv_data (Name, Brand, Price, Display, Features, Connectivity, Design)
SELECT Name, Brand, Price, Display, Features, Connectivity, Design
FROM tv_data_temp;

DROP TABLE tv_data_temp;
'''

# Execute the query to change the Brand column type to TEXT
cursor.executescript(query)
conn.commit()

# Verify the changes
query = '''
PRAGMA table_info(tv_data);
'''

# Execute the query and load the result into a Pandas DataFrame
updated_table_info_with_text_brand = pd.read_sql_query(query, conn)
updated_table_info_with_text_brand


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Brand,TEXT,0,,0
2,2,Price,INTEGER,0,,0
3,3,Display,TEXT,0,,0
4,4,Features,TEXT,0,,0
5,5,Connectivity,TEXT,0,,0
6,6,Design,TEXT,0,,0


 Extraction of the Model Column

In [88]:
# Create a temporary table with the desired column order and extract the model information
query = '''
CREATE TABLE tv_data_temp AS
SELECT
    Name,
    Brand,
    TRIM(SUBSTR(Name, LENGTH(Brand) + 2, INSTR(Name, ' inch') - LENGTH(Brand) - 3)) AS Model,
    Price,
    Display,
    Features,
    Connectivity,
    Design
FROM tv_data;

-- Drop the original table
DROP TABLE tv_data;

-- Rename the temporary table to the original table name
ALTER TABLE tv_data_temp RENAME TO tv_data;
'''

# Execute the queries to reorder columns and extract the model information
cursor.executescript(query)
conn.commit()

# Verify the changes
query = '''
PRAGMA table_info(tv_data);
'''

# Execute the query and load the result into a Pandas DataFrame
updated_table_info_with_model = pd.read_sql_query(query, conn)
updated_table_info_with_model


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Brand,TEXT,0,,0
2,2,Model,,0,,0
3,3,Price,INT,0,,0
4,4,Display,TEXT,0,,0
5,5,Features,TEXT,0,,0
6,6,Connectivity,TEXT,0,,0
7,7,Design,TEXT,0,,0


In [89]:
# Create a temporary table with the correct column types and desired column order
query = '''
CREATE TABLE tv_data_temp (
    Name TEXT,
    Brand TEXT,
    Model TEXT,
    Price INTEGER,
    Display TEXT,
    Features TEXT,
    Connectivity TEXT,
    Design TEXT
);

-- Copy data from the current table to the temporary table
INSERT INTO tv_data_temp (Name, Brand, Model, Price, Display, Features, Connectivity, Design)
SELECT Name, Brand, Model, Price, Display, Features, Connectivity, Design
FROM tv_data;

-- Drop the original table
DROP TABLE tv_data;

-- Rename the temporary table to the original table name
ALTER TABLE tv_data_temp RENAME TO tv_data;
'''

# Execute the queries to ensure correct column types and reorder columns
cursor.executescript(query)
conn.commit()

# Verify the changes
query = '''
PRAGMA table_info(tv_data);
'''

# Execute the query and load the result into a Pandas DataFrame
final_table_info = pd.read_sql_query(query, conn)
final_table_info


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Brand,TEXT,0,,0
2,2,Model,TEXT,0,,0
3,3,Price,INTEGER,0,,0
4,4,Display,TEXT,0,,0
5,5,Features,TEXT,0,,0
6,6,Connectivity,TEXT,0,,0
7,7,Design,TEXT,0,,0


In [90]:
# Create a temporary table with the desired column order and extract the screen size (in inches)
query = '''
CREATE TABLE tv_data_temp AS
SELECT
    Name,
    Brand,
    Model,
    CAST(SUBSTR(Name, INSTR(Name, 'inch') - 3, 2) AS INTEGER) AS Inches,
    Price,
    Display,
    Features,
    Connectivity,
    Design
FROM tv_data;

-- Drop the original table
DROP TABLE tv_data;

-- Rename the temporary table to the original table name
ALTER TABLE tv_data_temp RENAME TO tv_data;
'''

# Execute the queries to reorder columns and extract the screen size (in inches)
cursor.executescript(query)
conn.commit()

# Verify the changes
query = '''
PRAGMA table_info(tv_data);
'''

# Execute the query and load the result into a Pandas DataFrame
updated_table_info_with_inches = pd.read_sql_query(query, conn)
updated_table_info_with_inches


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Brand,TEXT,0,,0
2,2,Model,TEXT,0,,0
3,3,Inches,INT,0,,0
4,4,Price,INT,0,,0
5,5,Display,TEXT,0,,0
6,6,Features,TEXT,0,,0
7,7,Connectivity,TEXT,0,,0
8,8,Design,TEXT,0,,0


In [91]:
# Create a temporary table with the desired column order and correct column types
query = '''
CREATE TABLE tv_data_temp (
    Name TEXT,
    Brand TEXT,
    Model TEXT,
    Inches INTEGER,
    Price INTEGER,
    Resolution TEXT,
    Display TEXT,
    Features TEXT,
    Connectivity TEXT,
    Design TEXT
);

-- Copy data from the current table to the temporary table
INSERT INTO tv_data_temp (Name, Brand, Model, Inches, Price, Resolution, Display, Features, Connectivity, Design)
SELECT
    Name,
    Brand,
    Model,
    Inches,
    Price,
    TRIM(SUBSTR(Name, INSTR(Name, ')') + 2)) AS Resolution,
    Display,
    Features,
    Connectivity,
    Design
FROM tv_data;

-- Drop the original table
DROP TABLE tv_data;

-- Rename the temporary table to the original table name
ALTER TABLE tv_data_temp RENAME TO tv_data;
'''

# Execute the queries to ensure correct column types and reorder columns
cursor.executescript(query)
conn.commit()

# Verify the changes
query = '''
PRAGMA table_info(tv_data);
'''

# Execute the query and load the result into a Pandas DataFrame
final_table_info_with_resolution = pd.read_sql_query(query, conn)
final_table_info_with_resolution


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Brand,TEXT,0,,0
2,2,Model,TEXT,0,,0
3,3,Inches,INTEGER,0,,0
4,4,Price,INTEGER,0,,0
5,5,Resolution,TEXT,0,,0
6,6,Display,TEXT,0,,0
7,7,Features,TEXT,0,,0
8,8,Connectivity,TEXT,0,,0
9,9,Design,TEXT,0,,0


In [102]:
# Query to select the first 5 rows of the updated table
query = '''
SELECT *
FROM tv_data
LIMIT 5
'''

# Execute the query and load the result into a Pandas DataFrame
first_five_rows_updated = pd.read_sql_query(query, conn)
first_five_rows_updated


Unnamed: 0,Name,Brand,Model,Inches,Price,DisplayType,Display,Resolution,Features,Connectivity,Design
0,oneplus y1s 40 inch (101 cm) led full hd tv,oneplus,y1s 4,40,18990,led full hd tv,"40"" (101.6 cm), LED\nFull HD, 1920x1080\n60 Hz...",1920x1080,Apps\nVoice Control\nSmart Remote,Sound Output: 20 W\n2 USB Ports\n2 HDMI Ports,892 x 512.6 x 85.6 mm\nWeight: 5.1 kg\nColour:...
1,tcl 32s5403af 32 inch (81 cm) led full hd tv,tcl,32s5403af 3,32,13990,led full hd tv,"32"" (81.28 cm), LED\nFull HD, 1920x1080\n60 Hz...",1920x1080,Apps\nVoice Control\nSmart Remote,Sound Output: 24 W\n1 USB Ports\n2 HDMI Ports,715 x 421 x 80 mm\nWeight: 3.44 kg\nColour: Black
2,tcl 32s5400a 32 inch (81 cm) led hd-ready tv,tcl,32s5400a 3,32,9990,led hd-ready tv,"32"" (81.28 cm), LED\nHD-Ready, 1366x768\n60 Hz...",1366x768,Apps\nVoice Control\nSmart Remote,Sound Output: 24 W\n1 USB Ports\n2 HDMI Ports,715 x 468 x 80 mm\nWeight: 3.55 kg\nColour: Black
3,sony bravia kd-65x74l 65 inch (165 cm) led 4k tv,sony,bravia kd-65x74l 6,65,74990,led 4k tv,"65"" (165.1 cm), LED\n4K, 3840x2160\n50 Hz Refr...",3840x2160,Apps\nVoice Control\nSmart Remote,Sound Output: 20 W\n2 USB Ports\n3 HDMI Ports,1463 x 852 x 87 mm\nWeight: 21.4 kg\nColour: B...
4,sony bravia kd-43x74k 43 inch (109 cm) led 4k tv,sony,bravia kd-43x74k 4,43,39990,led 4k tv,"43"" (109.22 cm), LED\n4K, 3840x2160\n60 Hz Ref...",3840x2160,Apps and Games\nVoice Control\nSmart Remote,Sound Output: 20 W\n2 USB Ports\n3 HDMI Ports,971 x 575 x 77 mm\nWeight: 7.8 kg\nColour: Black


In [105]:
# Get the list of columns and their data types
query1 = "PRAGMA table_info(tv_data)"
columns_info = pd.read_sql(query1, conn)
columns_info

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Brand,TEXT,0,,0
2,2,Model,TEXT,0,,0
3,3,Inches,INTEGER,0,,0
4,4,Price,INTEGER,0,,0
5,5,DisplayType,TEXT,0,,0
6,6,Display,TEXT,0,,0
7,7,Resolution,TEXT,0,,0
8,8,Features,TEXT,0,,0
9,9,Connectivity,TEXT,0,,0


In [106]:
# Rename the existing Resolution column to DisplayType
rename_column_query = "ALTER TABLE tv_data RENAME COLUMN Resolution TO DisplayType"
cursor.execute(rename_column_query)

# Confirm the column rename
query_columns_info = "PRAGMA table_info(tv_data)"
updated_columns_info = pd.read_sql(query_columns_info, conn)



updated_columns_info


OperationalError: error in table tv_data after rename: duplicate column name: DisplayType

In [107]:
# Add the ActualResolution column
cursor.execute("ALTER TABLE tv_data ADD COLUMN ActualResolution TEXT")

# Extract the resolution using a general pattern
extract_resolution_query = """
UPDATE tv_data
SET ActualResolution = TRIM(SUBSTR(Display, INSTR(Display, 'x')-4, 9))
WHERE INSTR(Display, 'x') > 0;
"""
cursor.execute(extract_resolution_query)

# Confirm the extraction by displaying the first 5 rows of the updated table
query_actual_resolution = "SELECT Display, ActualResolution FROM tv_data LIMIT 5"
actual_resolution_first_5_rows = pd.read_sql(query_actual_resolution, conn)


actual_resolution_first_5_rows


Unnamed: 0,Display,ActualResolution
0,"40"" (101.6 cm), LED\nFull HD, 1920x1080\n60 Hz...",1920x1080
1,"32"" (81.28 cm), LED\nFull HD, 1920x1080\n60 Hz...",1920x1080
2,"32"" (81.28 cm), LED\nHD-Ready, 1366x768\n60 Hz...",1366x768\n
3,"65"" (165.1 cm), LED\n4K, 3840x2160\n50 Hz Refr...",3840x2160
4,"43"" (109.22 cm), LED\n4K, 3840x2160\n60 Hz Ref...",3840x2160


In [108]:
# Clean the ActualResolution column
clean_actual_resolution_query = """
UPDATE tv_data
SET ActualResolution = TRIM(REPLACE(ActualResolution, '\n', ''));
"""
cursor.execute(clean_actual_resolution_query)

# Confirm the cleaning by displaying the first 5 rows of the updated table
query_cleaned_actual_resolution = "SELECT Display, ActualResolution FROM tv_data LIMIT 5"
cleaned_actual_resolution_first_5_rows = pd.read_sql(query_cleaned_actual_resolution, conn)

cleaned_actual_resolution_first_5_rows


Unnamed: 0,Display,ActualResolution
0,"40"" (101.6 cm), LED\nFull HD, 1920x1080\n60 Hz...",1920x1080
1,"32"" (81.28 cm), LED\nFull HD, 1920x1080\n60 Hz...",1920x1080
2,"32"" (81.28 cm), LED\nHD-Ready, 1366x768\n60 Hz...",1366x768
3,"65"" (165.1 cm), LED\n4K, 3840x2160\n50 Hz Refr...",3840x2160
4,"43"" (109.22 cm), LED\n4K, 3840x2160\n60 Hz Ref...",3840x2160


In [109]:
# Get the list of columns and their data types
query = "PRAGMA table_info(tv_data)"
columns_info = pd.read_sql(query, conn)
columns_info

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Brand,TEXT,0,,0
2,2,Model,TEXT,0,,0
3,3,Inches,INTEGER,0,,0
4,4,Price,INTEGER,0,,0
5,5,DisplayType,TEXT,0,,0
6,6,Display,TEXT,0,,0
7,7,Resolution,TEXT,0,,0
8,8,Features,TEXT,0,,0
9,9,Connectivity,TEXT,0,,0


In [110]:
# Rename the ActualResolution column to Resolution and place it after the Display column
# Since SQLite doesn't support direct column reordering, we'll create a new table with the desired schema and copy the data over.

# Create a new table with the desired column order
create_new_table_query = """
CREATE TABLE tv_data_new (
    Name TEXT,
    Brand TEXT,
    Model TEXT,
    Inches INTEGER,
    Price INTEGER,
    DisplayType TEXT,
    Display TEXT,
    Resolution TEXT,
    Features TEXT,
    Connectivity TEXT,
    Design TEXT
);
"""
cursor.execute(create_new_table_query)

# Copy the data from the old table to the new table
copy_data_query = """
INSERT INTO tv_data_new (Name, Brand, Model, Inches, Price, DisplayType, Display, Resolution, Features, Connectivity, Design)
SELECT Name, Brand, Model, Inches, Price, DisplayType, Display, ActualResolution, Features, Connectivity, Design
FROM tv_data;
"""
cursor.execute(copy_data_query)

# Drop the old table
drop_old_table_query = "DROP TABLE tv_data"
cursor.execute(drop_old_table_query)

# Rename the new table to the original table name
rename_new_table_query = "ALTER TABLE tv_data_new RENAME TO tv_data"
cursor.execute(rename_new_table_query)

# Confirm the column rename and reordering
query_columns_info_final = "PRAGMA table_info(tv_data)"
final_columns_info = pd.read_sql(query_columns_info_final, conn)


final_columns_info


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Brand,TEXT,0,,0
2,2,Model,TEXT,0,,0
3,3,Inches,INTEGER,0,,0
4,4,Price,INTEGER,0,,0
5,5,DisplayType,TEXT,0,,0
6,6,Display,TEXT,0,,0
7,7,Resolution,TEXT,0,,0
8,8,Features,TEXT,0,,0
9,9,Connectivity,TEXT,0,,0


In [111]:
# Query to get 5 random rows of the dataset
query_random_5_rows = "SELECT * FROM tv_data ORDER BY RANDOM() LIMIT 5"
random_5_rows = pd.read_sql(query_random_5_rows, conn)

random_5_rows

Unnamed: 0,Name,Brand,Model,Inches,Price,DisplayType,Display,Resolution,Features,Connectivity,Design
0,jvc lt-65nq7115cgx 65 inch (165 cm) led 4k tv,jvc,lt-65nq7115cgx 6,65,67490,led 4k tv,"65"" (165.1 cm), LED\n4K, 3840x2160\n60 Hz Refr...",3840x2160,Apps\nVoice Control,Sound Output: 24 W\n2 USB Ports\n4 HDMI Ports,Colour: Black
1,tcl 75p635 75 inch (190 cm) led 4k tv,tcl,75p635 7,75,69990,led 4k tv,"75"" (190.5 cm), LED\n4K, 3840x2160\n60 Hz Refr...",3840x2160,Apps\nSmart Remote,Sound Output: 20 W\n2 USB Ports\n3 HDMI Ports,1446 x 833 x 81 mm\nWeight: 10.5 kg\nColour: B...
2,samsung qa75qn90dau 75 inch (190 cm) neo qled ...,samsung,qa75qn90dau 7,75,449990,neo qled 4k tv,"75"" (190.5 cm), Neo QLED\n4K, 3840x2160\n100 H...",3840x2160,Apps and Games\nWeb Browser\nVoice Control,Sound Output: 60 W\n2 USB Ports\n4 HDMI Ports,16.7 x 957.4 x 27.7 mm\nWeight: 34.1 kg\nColou...
3,tcl 55c755 55 inch (139 cm) qd-mini led 4k tv,tcl,55c755 5,55,79990,qd-mini led 4k tv,"55"" (139.7 cm), QD-Mini LED\n4K, 3840x2160\n14...",3840x2160,Apps\nVoice Control\nSmart Remote,Sound Output: 30 W\n2 USB Ports\n4 HDMI Ports,1224 x 710 x 70 mm\nWeight: 13 kg\nColour: Black
4,lg 55uq8040psb 55 inch (139 cm) led 4k tv,lg,55uq8040psb 5,55,50999,led 4k tv,"55"" (139.7 cm), LED\n4K, 3840x2160\n60 Hz Refr...",3840x2160,Apps,Sound Output: 20 W\n3 HDMI Ports,1235 x 715 x 57.5 mm\nWeight: 14 kg\nColour: B...


In [112]:
# Query to get the value counts of the Features column
query_features_value_counts = "SELECT Features, COUNT(*) as count FROM tv_data GROUP BY Features ORDER BY count DESC"
features_value_counts = pd.read_sql(query_features_value_counts, conn)

features_value_counts

Unnamed: 0,Features,count
0,Apps and Games\nWeb Browser\nVoice Control,190
1,Apps\nVoice Control\nSmart Remote,147
2,Apps,128
3,,81
4,Apps\nVoice Control,67
5,Apps and Games\nWeb Browser\nFacebook,55
6,Apps\nSmart Remote,55
7,and Games\nVoice Control\nSmart Remote,35
8,and Games\nWeb Browser\nVoice Control,31
9,Apps and Games\nVoice Control\nSmart Remote,31


In [113]:
# Update the rows with NaN in the Features column where the text starts with "Sound Output"
update_features_nan_query = """
UPDATE tv_data
SET Features = NULL
WHERE Features LIKE 'Sound Output%';
"""
cursor.execute(update_features_nan_query)

# Confirm the update by selecting the rows where Features is NULL
query_confirm_update = """
SELECT *
FROM tv_data
WHERE Features IS NULL;
"""
features_nan_confirm = pd.read_sql(query_confirm_update, conn)


features_nan_confirm



Unnamed: 0,Name,Brand,Model,Inches,Price,DisplayType,Display,Resolution,Features,Connectivity,Design
0,marq 32hdndqee1b 32 inch (81 cm) led hd-ready tv,marq,32hdndqee1b 3,32,7449,led hd-ready tv,"32"" (81.28 cm), LED\nHD-Ready, 1366x768\n60 Hz...",1366x768,,"2 USB Ports, 2 HDMI Ports, Ethernet, Wi-Fi",Colour: Black
1,dyanora dy-ld32h1n 32 inch (81 cm) led hd-read...,dyanora,dy-ld32h1n 3,32,7999,led hd-ready tv,"32"" (81.28 cm), LED\nHD-Ready, 1366x768\n60 Hz...",1366x768,,"2 USB Ports, 3 HDMI Ports, Wi-Fi, Bluetooth",718 x 420 x 80 mm\nColour: Black
2,beethosol ledatbg32hdek 32 inch (81 cm) led hd...,beethosol,ledatbg32hdek 3,32,7399,led hd-ready tv,"32"" (81.28 cm), LED\nHD-Ready, 1366x768\n60 Hz...",1366x768,,"2 USB Ports, 3 HDMI Ports, Wi-Fi, Bluetooth",Colour: Grey
3,sansui jsw65asuhdff 65 inch (165 cm) led 4k tv,sansui,jsw65asuhdff 6,65,51999,led 4k tv,"65"" (165.1 cm), LED\n4K, 3840x2160\n60 Hz Refr...",3840x2160,,"3 USB Ports, 4 HDMI Ports, Ethernet, Wi-Fi",Colour: Black
4,adsun a-3200n 32 inch (81 cm) led hd-ready tv,adsun,a-3200n 3,32,6789,led hd-ready tv,"32"" (81.28 cm), LED\nHD-Ready, 1366x768\n60 Hz...",1366x768,,"2 USB Ports, 2 HDMI Ports, Wi-Fi",Colour: Black
5,adsun a-3200f/n 32 inch (81 cm) led hd-ready tv,adsun,a-3200f/n 3,32,7200,led hd-ready tv,"32"" (81.28 cm), LED\nHD-Ready, 1366x768\n60 Hz...",1366x768,,"2 USB Ports, 2 HDMI Ports","73x6.99xN/A mm, Weight: N/A kg, Colour: Black"
6,beethosol ledatvbg2483hd17-tp 24 inch (60 cm) ...,beethosol,ledatvbg2483hd17-tp 2,24,5499,led hd-ready tv,"24"" (60.96 cm), LED\nHD-Ready, 1366x768\n60 Hz...",1366x768,,"1 USB Port, 1 HDMI Port",Colour: Black
7,samsung ua32t4010ar 32 inch (81 cm) led hd-rea...,samsung,ua32t4010ar 3,32,16290,led hd-ready tv,"32"" (81.28 cm), LED\nHD-Ready, 1366x768\n60 Hz...",1366x768,,"2 USB Ports, 2 HDMI Ports, Wi-Fi",731.7 x 439.2 x 83.0 mm\nWeight: 3.7 kg\nColou...
8,samsung qa43q60cak 43 inch (109 cm) qled 4k tv,samsung,qa43q60cak 4,43,62990,qled 4k tv,"43"" (109.22 cm), QLED\n4K, 3840x2160",3840x2160,,"2 USB Ports, 3 HDMI Ports, Wi-Fi, Ethernet",965.5 x 559.4 x 25.7 mm\nWeight: 8.1 kg\nColou...
9,samsung ua50cu8000k 50 inch (127 cm) led 4k tv,samsung,ua50cu8000k 5,50,55290,led 4k tv,"50"" (127 cm), LED\n4K, 3840x2160\n50 Hz Refres...",3840x2160,,"3 USB Ports, 4 HDMI Ports, Wi-Fi, Ethernet",1118.3 x 644.6 x 25.7 mm\nWeight: 11.2 kg\nCol...


In [114]:
# Query to check for null values in each column after the update
query_null_values_after_update = """
SELECT
    SUM(CASE WHEN Name IS NULL THEN 1 ELSE 0 END) AS Name_nulls,
    SUM(CASE WHEN Brand IS NULL THEN 1 ELSE 0 END) AS Brand_nulls,
    SUM(CASE WHEN Model IS NULL THEN 1 ELSE 0 END) AS Model_nulls,
    SUM(CASE WHEN Inches IS NULL THEN 1 ELSE 0 END) AS Inches_nulls,
    SUM(CASE WHEN Price IS NULL THEN 1 ELSE 0 END) AS Price_nulls,
    SUM(CASE WHEN DisplayType IS NULL THEN 1 ELSE 0 END) AS DisplayType_nulls,
    SUM(CASE WHEN Display IS NULL THEN 1 ELSE 0 END) AS Display_nulls,
    SUM(CASE WHEN Resolution IS NULL THEN 1 ELSE 0 END) AS Resolution_nulls,
    SUM(CASE WHEN Features IS NULL THEN 1 ELSE 0 END) AS Features_nulls,
    SUM(CASE WHEN Connectivity IS NULL THEN 1 ELSE 0 END) AS Connectivity_nulls,
    SUM(CASE WHEN Design IS NULL THEN 1 ELSE 0 END) AS Design_nulls
FROM tv_data;
"""
null_values_after_update = pd.read_sql(query_null_values_after_update, conn)


null_values_after_update


Unnamed: 0,Name_nulls,Brand_nulls,Model_nulls,Inches_nulls,Price_nulls,DisplayType_nulls,Display_nulls,Resolution_nulls,Features_nulls,Connectivity_nulls,Design_nulls
0,0,0,0,0,0,0,0,0,81,0,0


In [116]:
# Step 1: Create a new table without the Features and Connectivity columns
query_create_new_table = """
CREATE TABLE tv_data_new AS
SELECT Name, Brand, Model, Inches, Price, DisplayType, Display, Resolution, Design
FROM tv_data;
"""
conn.execute(query_create_new_table)

# Verify the creation of the new table by listing all columns
query_verify_new_table = "PRAGMA table_info(tv_data_new);"
new_table_columns = conn.execute(query_verify_new_table).fetchall()

# Step 2: Drop the old table
query_drop_old_table = "DROP TABLE tv_data;"
conn.execute(query_drop_old_table)

# Step 3: Rename the new table to the original table name
query_rename_table = "ALTER TABLE tv_data_new RENAME TO tv_data;"
conn.execute(query_rename_table)

# Verify the new table structure
query_verify_final_table = "PRAGMA table_info(tv_data);"
final_table_columns = conn.execute(query_verify_final_table).fetchall()

new_table_columns


OperationalError: no such column: Name

In [None]:
final_table_columns

[(0, 'Name', 'TEXT', 0, None, 0),
 (1, 'Brand', 'TEXT', 0, None, 0),
 (2, 'Model', 'TEXT', 0, None, 0),
 (3, 'Inches', 'TEXT', 0, None, 0),
 (4, 'Price', 'TEXT', 0, None, 0),
 (5, 'DisplayType', 'TEXT', 0, None, 0),
 (6, 'Display', 'TEXT', 0, None, 0),
 (7, 'Resolution', 'TEXT', 0, None, 0),
 (8, 'Design', 'TEXT', 0, None, 0)]

In [None]:
# Query to display the PRAGMA table info of the tv_data table
query_pragma = "PRAGMA table_info(tv_data);"
pragma_info = conn.execute(query_pragma).fetchall()

pragma_info


[(0, 'Name', 'TEXT', 0, None, 0),
 (1, 'Brand', 'TEXT', 0, None, 0),
 (2, 'Model', 'TEXT', 0, None, 0),
 (3, 'Inches', 'TEXT', 0, None, 0),
 (4, 'Price', 'TEXT', 0, None, 0),
 (5, 'DisplayType', 'TEXT', 0, None, 0),
 (6, 'Display', 'TEXT', 0, None, 0),
 (7, 'Resolution', 'TEXT', 0, None, 0),
 (8, 'Design', 'TEXT', 0, None, 0)]

In [None]:

# Query to get the PRAGMA table info of the tv_data table
query_pragma = "PRAGMA table_info(tv_data);"
pragma_info = conn.execute(query_pragma).fetchall()

# Convert PRAGMA info to DataFrame
pragma_df = pd.DataFrame(pragma_info, columns=["cid", "name", "type", "notnull", "dflt_value", "pk"])


pragma_df


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Brand,TEXT,0,,0
2,2,Model,TEXT,0,,0
3,3,Inches,TEXT,0,,0
4,4,Price,TEXT,0,,0
5,5,DisplayType,TEXT,0,,0
6,6,Display,TEXT,0,,0
7,7,Resolution,TEXT,0,,0
8,8,Design,TEXT,0,,0


In [None]:
# Query to get the first 5 rows of the Design column
query_first_five_design = "SELECT Design FROM tv_data LIMIT 5;"
first_five_design = conn.execute(query_first_five_design).fetchall()

# Query to get the last 5 rows of the Design column
query_last_five_design = "SELECT Design FROM tv_data ORDER BY rowid DESC LIMIT 5;"
last_five_design = conn.execute(query_last_five_design).fetchall()

# Query to get the value counts of the Design column
query_value_counts_design = "SELECT Design, COUNT(*) as count FROM tv_data GROUP BY Design ORDER BY count DESC;"
value_counts_design = conn.execute(query_value_counts_design).fetchall()

first_five_design


[('892 x 512.6 x 85.6 mm\nWeight: 5.1 kg\nColour: Black',),
 ('715 x 421 x 80 mm\nWeight: 3.44 kg\nColour: Black',),
 ('715 x 468 x 80 mm\nWeight: 3.55 kg\nColour: Black',),
 ('1463 x 852 x 87 mm\nWeight: 21.4 kg\nColour: Black',),
 ('971 x 575 x 77 mm\nWeight: 7.8 kg\nColour: Black',)]

In [None]:
last_five_design

[('715.7 x 421.9 x 79.3 mm\nColour: Black',),
 ('1225.8 x 709.1 x 86.1 mm\nColour: Grey',),
 ('1456.1 x 840.4 x 54.4 mm\nWeight: 23.2 kg\nColour: Black',),
 ('1693 x 978 x 88.8 mm\nWeight: 34.6 kg\nColour: Black',),
 ('732 x 436 x 85 mm\nWeight: 4.1 kg\nColour: Black',)]

In [None]:
value_counts_design

[('Colour: Black', 201),
 ('Colour: Grey', 13),
 ('Colour: Silver', 8),
 ('967 x 564 x 57.1 mm\nWeight: 8.8 kg\nColour: Black', 7),
 ('971 x 575 x 77 mm\nWeight: 7.8 kg\nColour: Black', 5),
 ('1452.9 x 834 x 60.6 mm\nWeight: 15.9 kg\nColour: Black', 5),
 ('1235 x 715 x 57.5 mm\nWeight: 14 kg\nColour: Black', 5),
 ('1121 x 651 x 57.1 mm\nWeight: 11.7 kg\nColour: Black', 5),
 ('970 x 550 x 100 mm\nColour: Black', 4),
 ('97.1x57.5x7.7 cm, Weight: 7.8 kg, Colour: Black', 4),
 ('954 x 540 x 90 mm\nColour: Black', 4),
 ('1244 x 726 x 87.1 mm\nWeight: 14.3 kg\nColour: Black', 4),
 ('1228 x 706 x 46.9 mm\nWeight: 18.9 kg\nColour: Black', 4),
 ('1127 x 662 x 77 mm\nWeight: 9.8 kg\nColour: Black', 4),
 ('Colour: White', 3),
 ('970 x 570 x 120 mm\nColour: Black', 3),
 ('967.5 x 561.4 x 59.7 mm\nWeight: 6.8 kg\nColour: Black', 3),
 ('965.5 x 559.4 x 25.7 mm\nWeight: 8.1 kg\nColour: Black', 3),
 ('935 x 440 x 110 mm\nColour: Black', 3),
 ('925 x 545 x 110 mm\nColour: Black', 3),
 ('735 x 440 x 110 

In [None]:
# Add new columns for Weight and Colour
conn.execute("ALTER TABLE tv_data ADD COLUMN Weight TEXT;")
conn.execute("ALTER TABLE tv_data ADD COLUMN Colour TEXT;")

# Update Weight column
conn.execute("""
    UPDATE tv_data
    SET Weight = TRIM(SUBSTR(Design, INSTR(Design, 'Weight: ') + 8, INSTR(Design, ' kg') - INSTR(Design, 'Weight: ') - 8))
""")

# Update Colour column
conn.execute("""
    UPDATE tv_data
    SET Colour = TRIM(SUBSTR(Design, INSTR(Design, 'Colour: ') + 8))
""")

# Verify the updates by fetching the first 5 rows
query_verify_updates = "SELECT Name, Weight, Colour FROM tv_data LIMIT 5;"
updated_rows = conn.execute(query_verify_updates).fetchall()

updated_rows


[('oneplus y1s 40 inch (101 cm) led full hd tv', '5.1', 'Black'),
 ('tcl 32s5403af 32 inch (81 cm) led full hd tv', '3.44', 'Black'),
 ('tcl 32s5400a 32 inch (81 cm) led hd-ready tv', '3.55', 'Black'),
 ('sony bravia kd-65x74l 65 inch (165 cm) led 4k tv', '21.4', 'Black'),
 ('sony bravia kd-43x74k 43 inch (109 cm) led 4k tv', '7.8', 'Black')]

In [None]:
# Query to display the PRAGMA table info of the tv_data table
query_pragma = "PRAGMA table_info(tv_data);"
pragma_info = conn.execute(query_pragma).fetchall()

# Convert PRAGMA info to DataFrame
pragma_df = pd.DataFrame(pragma_info, columns=["cid", "name", "type", "notnull", "dflt_value", "pk"])


pragma_df


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Brand,TEXT,0,,0
2,2,Model,TEXT,0,,0
3,3,Inches,TEXT,0,,0
4,4,Price,TEXT,0,,0
5,5,DisplayType,TEXT,0,,0
6,6,Display,TEXT,0,,0
7,7,Resolution,TEXT,0,,0
8,8,Design,TEXT,0,,0
9,9,Weight,TEXT,0,,0


In [None]:
# Query to get the first 5 rows of the table
query_first_five = "SELECT * FROM tv_data LIMIT 5;"
first_five_df = pd.read_sql_query(query_first_five, conn)

# Query to get the last 5 rows of the table
query_last_five = "SELECT * FROM tv_data ORDER BY rowid DESC LIMIT 5;"
last_five_df = pd.read_sql_query(query_last_five, conn)

# Query to get 5 random rows of the table
query_random_five = "SELECT * FROM tv_data ORDER BY RANDOM() LIMIT 5;"
random_five_df = pd.read_sql_query(query_random_five, conn)


first_five_df



Unnamed: 0,Name,Brand,Model,Inches,Price,DisplayType,Display,Resolution,Design,Weight,Colour
0,oneplus y1s 40 inch (101 cm) led full hd tv,oneplus,y1s 4,40,18990,led full hd tv,"40"" (101.6 cm), LED\nFull HD, 1920x1080\n60 Hz...",1920x1080,892 x 512.6 x 85.6 mm\nWeight: 5.1 kg\nColour:...,5.1,Black
1,tcl 32s5403af 32 inch (81 cm) led full hd tv,tcl,32s5403af 3,32,13990,led full hd tv,"32"" (81.28 cm), LED\nFull HD, 1920x1080\n60 Hz...",1920x1080,715 x 421 x 80 mm\nWeight: 3.44 kg\nColour: Black,3.44,Black
2,tcl 32s5400a 32 inch (81 cm) led hd-ready tv,tcl,32s5400a 3,32,9990,led hd-ready tv,"32"" (81.28 cm), LED\nHD-Ready, 1366x768\n60 Hz...",1366x768,715 x 468 x 80 mm\nWeight: 3.55 kg\nColour: Black,3.55,Black
3,sony bravia kd-65x74l 65 inch (165 cm) led 4k tv,sony,bravia kd-65x74l 6,65,74990,led 4k tv,"65"" (165.1 cm), LED\n4K, 3840x2160\n50 Hz Refr...",3840x2160,1463 x 852 x 87 mm\nWeight: 21.4 kg\nColour: B...,21.4,Black
4,sony bravia kd-43x74k 43 inch (109 cm) led 4k tv,sony,bravia kd-43x74k 4,43,39990,led 4k tv,"43"" (109.22 cm), LED\n4K, 3840x2160\n60 Hz Ref...",3840x2160,971 x 575 x 77 mm\nWeight: 7.8 kg\nColour: Black,7.8,Black


In [None]:
last_five_df

Unnamed: 0,Name,Brand,Model,Inches,Price,DisplayType,Display,Resolution,Design,Weight,Colour
0,haier le32k8200gt 32 inch (81 cm) led hd-ready tv,haier,le32k8200gt 3,32,14490,led hd-ready tv,"32"" (81.28 cm), LED\nHD-Ready, 1366x768",1366x768,715.7 x 421.9 x 79.3 mm\nColour: Black,715.7 x,Black
1,haier 55p7gt 55 inch (139 cm) led 4k tv,haier,55p7gt 5,55,42750,led 4k tv,"55"" (139.7 cm), LED\n4K, 3840x2160\n60 Hz Refr...",3840x2160,1225.8 x 709.1 x 86.1 mm\nColour: Grey,1225.8,Grey
2,samsung ua65mu6470u 65 inch (165 cm) led 4k tv,samsung,ua65mu6470u 6,65,293900,led 4k tv,"65"" (165.1 cm), LED\n4K, 3840x2160",3840x2160,1456.1 x 840.4 x 54.4 mm\nWeight: 23.2 kg\nCol...,23.2,Black
3,lg 75un8000ptb 75 inch (190 cm) led 4k tv,lg,75un8000ptb 7,75,119990,led 4k tv,"75"" (190.5 cm), LED\n4K, 3840x2160\n50 Hz Refr...",3840x2160,1693 x 978 x 88.8 mm\nWeight: 34.6 kg\nColour:...,34.6,Black
4,lloyd l32hb250b 32 inch (81 cm) led hd-ready tv,lloyd,l32hb250b 3,32,14799,led hd-ready tv,"32"" (81.28 cm), LED\nHD-Ready, 1336x768",1336x768,732 x 436 x 85 mm\nWeight: 4.1 kg\nColour: Black,4.1,Black


In [None]:
random_five_df

Unnamed: 0,Name,Brand,Model,Inches,Price,DisplayType,Display,Resolution,Design,Weight,Colour
0,samsung qa55qn85cak 55 inch (139 cm) neo qled ...,samsung,qa55qn85cak 5,55,159990,neo qled 4k tv,"55"" (139.7 cm), Neo QLED\n4K, 3840x2160\n120 H...",3840x2160,1227 x 706 x 26 mm\nColour: Black,1227 x,Black
1,motorola 43uhdadmrs3p 43 inch (109 cm) led 4k tv,motorola,43uhdadmrs3p 4,43,29999,led 4k tv,"43"" (109.22 cm), LED\n4K, 3840x2160\n60 Hz Ref...",3840x2160,959.7 x 562.6 x 75.8 mm\nWeight: 10.5 kg\nColo...,10.5,Black
2,lg 43ut90506lb 43 inch (109 cm) led 4k tv,lg,43ut90506lb 4,43,54990,led 4k tv,"43"" (109.22 cm), LED\n4K, 3840x2160\n60 Hz Ref...",3840x2160,968 x 565 x 29.7 mm\nWeight: 9.3 kg\nColour: B...,9.3,Black
3,samsung qa50q60dau 50 inch (127 cm) qled 4k tv,samsung,qa50q60dau 5,50,64990,qled 4k tv,"50"" (127 cm), QLED\n4K, 3840x2160\n100 Hz Refr...",3840x2160,11.1 x 644.1 x 25.7 mm\nWeight: 11.2 kg\nColou...,11.2,Black
4,tcl 65p735 65 inch (165 cm) qled 4k tv,tcl,65p735 6,65,65990,qled 4k tv,"65"" (165.1 cm), QLED\n4K, 3840x2160\n60 Hz Ref...",3840x2160,1445 x 833 x 81 mm\nWeight: 14 kg\nColour: Black,14,Black


In [None]:
# Remove the Weight column from the tv_data table
conn.execute("ALTER TABLE tv_data DROP COLUMN Weight;")

# Verify the table structure to confirm the removal of the Weight column
query_verify_table_structure = "PRAGMA table_info(tv_data);"
table_structure_df = pd.read_sql_query(query_verify_table_structure, conn)


table_structure_df



Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Brand,TEXT,0,,0
2,2,Model,TEXT,0,,0
3,3,Inches,TEXT,0,,0
4,4,Price,TEXT,0,,0
5,5,DisplayType,TEXT,0,,0
6,6,Display,TEXT,0,,0
7,7,Resolution,TEXT,0,,0
8,8,Design,TEXT,0,,0
9,9,Colour,TEXT,0,,0


In [None]:
# Query to get the first 5 rows of the table
query_first_five = "SELECT * FROM tv_data LIMIT 5;"
first_five_df = pd.read_sql_query(query_first_five, conn)

# Query to get the last 5 rows of the table
query_last_five = "SELECT * FROM tv_data ORDER BY rowid DESC LIMIT 5;"
last_five_df = pd.read_sql_query(query_last_five, conn)

# Query to get 5 random rows of the table
query_random_five = "SELECT * FROM tv_data ORDER BY RANDOM() LIMIT 5;"
random_five_df = pd.read_sql_query(query_random_five, conn)


first_five_df


Unnamed: 0,Name,Brand,Model,Inches,Price,DisplayType,Display,Resolution,Design,Colour
0,oneplus y1s 40 inch (101 cm) led full hd tv,oneplus,y1s 4,40,18990,led full hd tv,"40"" (101.6 cm), LED\nFull HD, 1920x1080\n60 Hz...",1920x1080,892 x 512.6 x 85.6 mm\nWeight: 5.1 kg\nColour:...,Black
1,tcl 32s5403af 32 inch (81 cm) led full hd tv,tcl,32s5403af 3,32,13990,led full hd tv,"32"" (81.28 cm), LED\nFull HD, 1920x1080\n60 Hz...",1920x1080,715 x 421 x 80 mm\nWeight: 3.44 kg\nColour: Black,Black
2,tcl 32s5400a 32 inch (81 cm) led hd-ready tv,tcl,32s5400a 3,32,9990,led hd-ready tv,"32"" (81.28 cm), LED\nHD-Ready, 1366x768\n60 Hz...",1366x768,715 x 468 x 80 mm\nWeight: 3.55 kg\nColour: Black,Black
3,sony bravia kd-65x74l 65 inch (165 cm) led 4k tv,sony,bravia kd-65x74l 6,65,74990,led 4k tv,"65"" (165.1 cm), LED\n4K, 3840x2160\n50 Hz Refr...",3840x2160,1463 x 852 x 87 mm\nWeight: 21.4 kg\nColour: B...,Black
4,sony bravia kd-43x74k 43 inch (109 cm) led 4k tv,sony,bravia kd-43x74k 4,43,39990,led 4k tv,"43"" (109.22 cm), LED\n4K, 3840x2160\n60 Hz Ref...",3840x2160,971 x 575 x 77 mm\nWeight: 7.8 kg\nColour: Black,Black


In [None]:
last_five_df

Unnamed: 0,Name,Brand,Model,Inches,Price,DisplayType,Display,Resolution,Design,Colour
0,haier le32k8200gt 32 inch (81 cm) led hd-ready tv,haier,le32k8200gt 3,32,14490,led hd-ready tv,"32"" (81.28 cm), LED\nHD-Ready, 1366x768",1366x768,715.7 x 421.9 x 79.3 mm\nColour: Black,Black
1,haier 55p7gt 55 inch (139 cm) led 4k tv,haier,55p7gt 5,55,42750,led 4k tv,"55"" (139.7 cm), LED\n4K, 3840x2160\n60 Hz Refr...",3840x2160,1225.8 x 709.1 x 86.1 mm\nColour: Grey,Grey
2,samsung ua65mu6470u 65 inch (165 cm) led 4k tv,samsung,ua65mu6470u 6,65,293900,led 4k tv,"65"" (165.1 cm), LED\n4K, 3840x2160",3840x2160,1456.1 x 840.4 x 54.4 mm\nWeight: 23.2 kg\nCol...,Black
3,lg 75un8000ptb 75 inch (190 cm) led 4k tv,lg,75un8000ptb 7,75,119990,led 4k tv,"75"" (190.5 cm), LED\n4K, 3840x2160\n50 Hz Refr...",3840x2160,1693 x 978 x 88.8 mm\nWeight: 34.6 kg\nColour:...,Black
4,lloyd l32hb250b 32 inch (81 cm) led hd-ready tv,lloyd,l32hb250b 3,32,14799,led hd-ready tv,"32"" (81.28 cm), LED\nHD-Ready, 1336x768",1336x768,732 x 436 x 85 mm\nWeight: 4.1 kg\nColour: Black,Black


In [None]:
random_five_df

Unnamed: 0,Name,Brand,Model,Inches,Price,DisplayType,Display,Resolution,Design,Colour
0,samsung ua43du7660k 43 inch (109 cm) led 4k tv,samsung,ua43du7660k 4,43,38990,led 4k tv,"43"" (109.22 cm), LED\n4K, 3840x2160\n50 Hz Ref...",3840x2160,697.5 x 561.4 x 59.7 mm\nWeight: 7.1 kg\nColou...,Black
1,hisense 43a6k 43 inch (109 cm) led 4k tv,hisense,43a6k 4,43,27999,led 4k tv,"43"" (109.22 cm), LED\n4K, 3840x2160\n60 Hz Ref...",3840x2160,Colour: Black,Black
2,innoq 43e-delux 43 inch (109 cm) led full hd tv,innoq,43e-delux 4,43,14990,led full hd tv,"43"" (109.22 cm), LED\nFull HD, 1920x1080\n60 H...",1920x1080,Colour: Black,Black
3,sony bravia xr-65a95k 65 inch (165 cm) oled 4k tv,sony,bravia xr-65a95k 6,65,309990,oled 4k tv,"65"" (165.1 cm), OLED\n4K, 3840x2160\n120 Hz Re...",3840x2160,1444 x 837 x 43 mm\nWeight: 27 kg\nColour: Other,Other
4,croma crel043foa024601 43 inch (109 cm) led fu...,croma,crel043foa024601 4,43,23990,led full hd tv,"43"" (109.22 cm), LED\nFull HD, 1920x1080\n60 H...",1920x1080,957 x 605 x 220 mm\nWeight: 7.9 kg\nColour: Black,Black


In [None]:
# Create a new table without the Display and Design columns
query_create_new_table = """
CREATE TABLE tv_data_new AS
SELECT Name, Brand, Model, Inches, Price, DisplayType, Resolution, Colour
FROM tv_data;
"""
conn.execute(query_create_new_table)

# Drop the old table
conn.execute("DROP TABLE tv_data;")

# Rename the new table to the original table name
conn.execute("ALTER TABLE tv_data_new RENAME TO tv_data;")

# Verify the new table structure
query_verify_table_structure = "PRAGMA table_info(tv_data);"
table_structure_df = pd.read_sql_query(query_verify_table_structure, conn)

table_structure_df


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Name,TEXT,0,,0
1,1,Brand,TEXT,0,,0
2,2,Model,TEXT,0,,0
3,3,Inches,TEXT,0,,0
4,4,Price,TEXT,0,,0
5,5,DisplayType,TEXT,0,,0
6,6,Resolution,TEXT,0,,0
7,7,Colour,TEXT,0,,0


In [None]:
# Export the updated table to a CSV file
updated_df = pd.read_sql_query("SELECT * FROM tv_data;", conn)
output_file_path = "updated_data_tv.csv"
updated_df.to_csv(output_file_path, index=False)

output_file_path


'updated_data_tv.csv'

In [None]:
# Query to count the number of rows in the dataset
cursor.execute('SELECT COUNT(*) FROM tv_data')
num_rows = cursor.fetchone()[0]
num_rows


1061

In [None]:
# Display the first 5 rows of the DisplayType column
first_5_displaytype = cursor.execute('SELECT DisplayType FROM tv_data LIMIT 5').fetchall()

# Display the last 5 rows of the DisplayType column
last_5_displaytype = cursor.execute('SELECT DisplayType FROM tv_data ORDER BY ROWID DESC LIMIT 5').fetchall()

# Count the occurrences of each unique value in the DisplayType column
displaytype_value_counts = cursor.execute('SELECT DisplayType, COUNT(*) FROM tv_data GROUP BY DisplayType').fetchall()

first_5_displaytype


[('led full hd tv',),
 ('led full hd tv',),
 ('led hd-ready tv',),
 ('led 4k tv',),
 ('led 4k tv',)]

In [None]:
last_5_displaytype

[('led hd-ready tv',),
 ('led 4k tv',),
 ('led 4k tv',),
 ('led 4k tv',),
 ('led hd-ready tv',)]

In [None]:
displaytype_value_counts

[('32 inch (81 cm) led hd-ready tv', 1),
 ('43 inch (109 cm) led 4k tv', 1),
 ('50 inch (127 cm) led 4k tv', 1),
 ('55 inch (139 cm) led 4k tv', 1),
 ('65 inch (165 cm) led 4k tv', 1),
 ('lcd 4k tv', 1),
 ('led 4k tv', 481),
 ('led 8k uhd tv', 2),
 ('led full hd tv', 117),
 ('led hd-ready tv', 186),
 ('mini led 4k tv', 7),
 ('neo qled 4k tv', 26),
 ('neo qled 8k uhd tv', 11),
 ('oled 4k tv', 45),
 ('oled evo 4k tv', 15),
 ('qd-mini led 4k tv', 4),
 ('qled 4k tv', 148),
 ('qled 8k uhd tv', 3),
 ('qled full hd tv', 1),
 ('qled hd-ready tv', 2),
 ('qned 4k tv', 6),
 ('qned 8k uhd tv', 1)]

In [None]:
# SQL query to remove unnecessary information from DisplayType
cursor.execute('''
UPDATE tv_data
SET DisplayType = TRIM(REPLACE(DisplayType, substr(DisplayType, 1, instr(DisplayType, 'inch') + 4), ''))
''')
conn.commit()

# Verify the update by displaying the first few rows of the DisplayType column
cursor.execute('SELECT DisplayType FROM tv_data LIMIT 5')
updated_first_5_displaytype = cursor.fetchall()

updated_first_5_displaytype


[('full hd tv',), ('full hd tv',), ('hd-ready tv',), ('4k tv',), ('4k tv',)]

In [None]:
# SQL query to convert DisplayType values to title case
cursor.execute('''
UPDATE tv_data
SET DisplayType = UPPER(SUBSTR(DisplayType, 1, 1)) || LOWER(SUBSTR(DisplayType, 2, LENGTH(DisplayType) - 1))
''')
conn.commit()

# Verify the update by displaying the first few rows of the DisplayType column
cursor.execute('SELECT DisplayType FROM tv_data LIMIT 5')
title_case_first_5_displaytype = cursor.fetchall()

title_case_first_5_displaytype


[('Full hd tv',), ('Full hd tv',), ('Hd-ready tv',), ('4k tv',), ('4k tv',)]

In [None]:
# Query to fetch all distinct values from the DisplayType column
distinct_values = cursor.execute('SELECT DISTINCT DisplayType FROM tv_data').fetchall()
distinct_values


[('Full hd tv',),
 ('Hd-ready tv',),
 ('4k tv',),
 ('(127 cm) led 4k tv',),
 ('(109 cm) led 4k tv',),
 ('Evo 4k tv',),
 ('Qled 4k tv',),
 ('Qled 8k uhd tv',),
 ('(165 cm) led 4k tv',),
 ('(139 cm) led 4k tv',),
 ('Led 4k tv',),
 ('Ini led 4k tv',),
 ('8k uhd tv',),
 ('(81 cm) led hd-ready tv',)]

In [None]:
# SQL query to remove size information and strip text in the DisplayType column
cursor.execute('''
UPDATE tv_data
SET DisplayType = TRIM(REPLACE(DisplayType, substr(DisplayType, instr(DisplayType, 'cm)') - 5, 9), ''))
WHERE DisplayType LIKE '%cm)%'
''')
conn.commit()

# Verify the update by fetching the distinct values from the DisplayType column again
distinct_values_after_removal = cursor.execute('SELECT DISTINCT DisplayType FROM tv_data').fetchall()
distinct_values_after_removal


[('Full hd tv',),
 ('Hd-ready tv',),
 ('4k tv',),
 ('led 4k tv',),
 ('Evo 4k tv',),
 ('Qled 4k tv',),
 ('Qled 8k uhd tv',),
 ('Led 4k tv',),
 ('Ini led 4k tv',),
 ('8k uhd tv',),
 ('led hd-ready tv',)]

In [None]:
# SQL query to standardize terms in the DisplayType column
cursor.execute('''
UPDATE tv_data
SET DisplayType = CASE
    WHEN DisplayType = 'Full hd tv' THEN 'Full HD TV'
    WHEN DisplayType = 'Hd-ready tv' THEN 'HD-Ready TV'
    WHEN DisplayType = '4k tv' THEN '4K TV'
    WHEN DisplayType = 'led 4k tv' THEN 'LED 4K TV'
    WHEN DisplayType = 'Evo 4k tv' THEN 'EVO 4K TV'
    WHEN DisplayType = 'Qled 4k tv' THEN 'QLED 4K TV'
    WHEN DisplayType = 'Qled 8k uhd tv' THEN 'QLED 8K UHD TV'
    WHEN DisplayType = 'Ini led 4k tv' THEN 'Mini LED 4K TV'
    WHEN DisplayType = '8k uhd tv' THEN '8K UHD TV'
    WHEN DisplayType = 'led hd-ready tv' THEN 'LED HD-Ready TV'
    ELSE DisplayType
END
''')
conn.commit()

# Verify the update by fetching the distinct values from the DisplayType column again
standardized_distinct_values = cursor.execute('SELECT DISTINCT DisplayType FROM tv_data').fetchall()
standardized_distinct_values


[('Full HD TV',),
 ('HD-Ready TV',),
 ('4K TV',),
 ('LED 4K TV',),
 ('EVO 4K TV',),
 ('QLED 4K TV',),
 ('QLED 8K UHD TV',),
 ('Led 4k tv',),
 ('Mini LED 4K TV',),
 ('8K UHD TV',),
 ('LED HD-Ready TV',)]

In [None]:
# SQL query to correct remaining inconsistencies
cursor.execute('''
UPDATE tv_data
SET DisplayType = 'LED 4K TV'
WHERE DisplayType = 'Led 4k tv';
''')
conn.commit()

# Verify the update by fetching the distinct values from the DisplayType column again
final_distinct_values = cursor.execute('SELECT DISTINCT DisplayType FROM tv_data').fetchall()
final_distinct_values


[('Full HD TV',),
 ('HD-Ready TV',),
 ('4K TV',),
 ('LED 4K TV',),
 ('EVO 4K TV',),
 ('QLED 4K TV',),
 ('QLED 8K UHD TV',),
 ('Mini LED 4K TV',),
 ('8K UHD TV',),
 ('LED HD-Ready TV',)]

In [None]:
# Query to get the value counts of the DisplayType column after standardization
cursor.execute('SELECT DisplayType, COUNT(*) FROM tv_data GROUP BY DisplayType ORDER BY COUNT(*) DESC')
displaytype_value_counts_final = cursor.fetchall()

displaytype_value_counts_final


[('4K TV', 681),
 ('HD-Ready TV', 188),
 ('Full HD TV', 118),
 ('QLED 4K TV', 26),
 ('EVO 4K TV', 15),
 ('QLED 8K UHD TV', 11),
 ('LED 4K TV', 11),
 ('8K UHD TV', 6),
 ('Mini LED 4K TV', 4),
 ('LED HD-Ready TV', 1)]

In [None]:
# SQL query to remove "TV" from the DisplayType column
cursor.execute('''
UPDATE tv_data
SET DisplayType = REPLACE(DisplayType, ' TV', '')
''')
conn.commit()

# Verify the update by fetching the distinct values from the DisplayType column again
distinct_values_no_tv = cursor.execute('SELECT DISTINCT DisplayType FROM tv_data').fetchall()
distinct_values_no_tv


[('Full HD',),
 ('HD-Ready',),
 ('4K',),
 ('LED 4K',),
 ('EVO 4K',),
 ('QLED 4K',),
 ('QLED 8K UHD',),
 ('Mini LED 4K',),
 ('8K UHD',),
 ('LED HD-Ready',)]

In [None]:
# Query to get the final value counts of the DisplayType column after removing "TV"
cursor.execute('SELECT DisplayType, COUNT(*) FROM tv_data GROUP BY DisplayType ORDER BY COUNT(*) DESC')
final_value_counts_no_tv = cursor.fetchall()

final_value_counts_no_tv



[('4K', 681),
 ('HD-Ready', 188),
 ('Full HD', 118),
 ('QLED 4K', 26),
 ('EVO 4K', 15),
 ('QLED 8K UHD', 11),
 ('LED 4K', 11),
 ('8K UHD', 6),
 ('Mini LED 4K', 4),
 ('LED HD-Ready', 1)]

In [None]:
# Query to fetch the first 5 rows of the dataset
cursor.execute('SELECT * FROM tv_data LIMIT 5')
first_5_rows = cursor.fetchall()

# Convert the result to a DataFrame for better visualization
columns = [description[0] for description in cursor.description]
first_5_rows_df = pd.DataFrame(first_5_rows, columns=columns)


first_5_rows_df


Unnamed: 0,Name,Brand,Model,Inches,Price,DisplayType,Resolution,Colour
0,oneplus y1s 40 inch (101 cm) led full hd tv,oneplus,y1s 4,40,18990,Full HD,1920x1080,Black
1,tcl 32s5403af 32 inch (81 cm) led full hd tv,tcl,32s5403af 3,32,13990,Full HD,1920x1080,Black
2,tcl 32s5400a 32 inch (81 cm) led hd-ready tv,tcl,32s5400a 3,32,9990,HD-Ready,1366x768,Black
3,sony bravia kd-65x74l 65 inch (165 cm) led 4k tv,sony,bravia kd-65x74l 6,65,74990,4K,3840x2160,Black
4,sony bravia kd-43x74k 43 inch (109 cm) led 4k tv,sony,bravia kd-43x74k 4,43,39990,4K,3840x2160,Black
