In [None]:
import pandas as pd

file_path = "/content/CO2 EMISSIONS_COUNTRY 1970-2023.xlsx"
df = pd.read_excel(file_path)

In [None]:
print(" Original Data (First 5 rows):")
print(df.head())


 Original Data (First 5 rows):
      IPCC_annex Country_code_A3           Name Substance         Y_1970  \
0    Non-Annex_I             ABW          Aruba       CO2      25.213789   
1    Non-Annex_I             AFG    Afghanistan       CO2    1733.920270   
2    Non-Annex_I             AGO         Angola       CO2    8933.899038   
3    Non-Annex_I             AIA       Anguilla       CO2       2.177587   
4  Int. Aviation             AIR  Int. Aviation       CO2  169900.398512   

          Y_1971         Y_1972         Y_1973         Y_1974         Y_1975  \
0      28.827752      39.472108      44.289439      43.469148      57.396273   
1    1733.709906    1693.584478    1733.904787    2190.318066    2028.966850   
2    8519.512895   10366.104268   11346.995664   11806.560821   10904.652964   
3       2.177689       2.273190       2.118480       2.359836       2.593654   
4  169900.398512  179759.531088  187494.406315  180478.128954  174582.470528   

   ...         Y_2014         Y

In [None]:
# Identify & Remove Duplicate Rows
duplicate_count = df.duplicated().sum()
print(f"\n Number of duplicate rows before dropping: {duplicate_count}")
df = df.drop_duplicates()
print(f" Number of duplicate rows after dropping: {df.duplicated().sum()}")


 Number of duplicate rows before dropping: 0
 Number of duplicate rows after dropping: 0


In [None]:
# Handle Missing Values
missing_before = df.isnull().sum().sum()
print(f"\n Total missing values before filling: {missing_before}")
df = df.fillna(0)
missing_after = df.isnull().sum().sum()
print(f" Total missing values after filling: {missing_after}")


 Total missing values before filling: 0
 Total missing values after filling: 0


In [None]:
df = df.rename(columns=lambda x: x[2:] if x.startswith("Y_") else x)

In [None]:
print(df.head())

      IPCC_annex Country_code_A3           Name Substance           1970  \
0    Non-Annex_I             ABW          Aruba       CO2      25.213789   
1    Non-Annex_I             AFG    Afghanistan       CO2    1733.920270   
2    Non-Annex_I             AGO         Angola       CO2    8933.899038   
3    Non-Annex_I             AIA       Anguilla       CO2       2.177587   
4  Int. Aviation             AIR  Int. Aviation       CO2  169900.398512   

            1971           1972           1973           1974           1975  \
0      28.827752      39.472108      44.289439      43.469148      57.396273   
1    1733.709906    1693.584478    1733.904787    2190.318066    2028.966850   
2    8519.512895   10366.104268   11346.995664   11806.560821   10904.652964   
3       2.177689       2.273190       2.118480       2.359836       2.593654   
4  169900.398512  179759.531088  187494.406315  180478.128954  174582.470528   

   ...           2014           2015           2016           

In [None]:
print("\n Column data types before conversion:")
print(df.dtypes)
for col in df.columns:
      if df[col].dtype == 'object':
            try:
                df[col] = pd.to_numeric(df[col])
            except ValueError:
                pass
print("\n Column data types after conversion:")
print(df.dtypes)


 Column data types before conversion:
IPCC_annex          object
Country_code_A3     object
Name                object
Substance           object
1970               float64
1971               float64
1972               float64
1973               float64
1974               float64
1975               float64
1976               float64
1977               float64
1978               float64
1979               float64
1980               float64
1981               float64
1982               float64
1983               float64
1984               float64
1985               float64
1986               float64
1987               float64
1988               float64
1989               float64
1990               float64
1991               float64
1992               float64
1993               float64
1994               float64
1995               float64
1996               float64
1997               float64
1998               float64
1999               float64
2000               float64
2001            

In [None]:
print("\n Processed Data (First 5 rows):")
print(df.head())



 Processed Data (First 5 rows):
      IPCC_annex Country_code_A3           Name Substance           1970  \
0    Non-Annex_I             ABW          Aruba       CO2      25.213789   
1    Non-Annex_I             AFG    Afghanistan       CO2    1733.920270   
2    Non-Annex_I             AGO         Angola       CO2    8933.899038   
3    Non-Annex_I             AIA       Anguilla       CO2       2.177587   
4  Int. Aviation             AIR  Int. Aviation       CO2  169900.398512   

            1971           1972           1973           1974           1975  \
0      28.827752      39.472108      44.289439      43.469148      57.396273   
1    1733.709906    1693.584478    1733.904787    2190.318066    2028.966850   
2    8519.512895   10366.104268   11346.995664   11806.560821   10904.652964   
3       2.177689       2.273190       2.118480       2.359836       2.593654   
4  169900.398512  179759.531088  187494.406315  180478.128954  174582.470528   

   ...           2014        

In [None]:
import sqlite3
import pandas as pd

In [None]:
# Create a connection to SQLite
conn = sqlite3.connect("co2_emissions.db")

# Save DataFrame to SQLite table
df.to_sql("emissions", conn, if_exists="replace", index=False)

print("Data successfully inserted into SQLite database.")

Data successfully inserted into SQLite database.


In [None]:
query = "SELECT name FROM sqlite_master WHERE type='table';"
tables = pd.read_sql(query, conn)
print("List of Tables in the Database:")
print(tables)


List of Tables in the Database:
        name
0  emissions


In [None]:
query = """
SELECT Name, "2023" AS CO2_Emissions
FROM emissions
WHERE Name IN ('India','China','United States');
"""

result = pd.read_sql(query, conn)

#Displaying the result
print(" CO₂ Emissions:")
print(result)

 CO₂ Emissions:
            Name  CO2_Emissions
0          China   1.325964e+07
1          India   2.955182e+06
2  United States   4.682039e+06
