In [13]:
#
import pandas as pd
import sqlite3

data = {
    "Year": [1880, 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, 1900, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
    "CSIRO Adjusted Sea Level": [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 4.2, 4.3, 4.5, 4.7, 4.9, 5.0, 5.2, 5.4, 5.6, 5.7, 5.9, 6.1, 6.3, 6.5, 6.7, 6.9, 7.1, 7.3, 7.5, 7.8, 8.0, 8.2, 8.5, 8.7, 9.0]
}

df = pd.DataFrame(data)
df.to_csv('epa-sea-level.csv', index=False)
print("CSV file 'epa-sea-level.csv' has been created.")

df = pd.read_csv('epa-sea-level.csv')
print("CSV Data:")
print(df.head())

df.to_csv('output_sea_level.csv', index=False)

conn = sqlite3.connect('sea_level_data.db')
df.to_sql('sea_levels', conn, if_exists='replace', index=False)

data_frame_sql = pd.read_sql_query("SELECT * FROM sea_levels", conn)
print("SQL Data:")
print(data_frame_sql.head())

conn.close()

CSV file 'epa-sea-level.csv' has been created.
CSV Data:
   Year  CSIRO Adjusted Sea Level
0  1880                       0.1
1  1881                       0.1
2  1882                       0.1
3  1883                       0.1
4  1884                       0.1
SQL Data:
   Year  CSIRO Adjusted Sea Level
0  1880                       0.1
1  1881                       0.1
2  1882                       0.1
3  1883                       0.1
4  1884                       0.1


In [25]:
df.to_csv('epa-sea-level.csv', index=False)
print("CSV file 'epa-sea-level.csv' has been created.")

df_csv = pd.read_csv('epa-sea-level.csv')
print("CSV Data:")
print(df_csv.head())

df.to_excel('epa-sea-level.xlsx', index=False)
print("Excel file 'epa-sea-level.xlsx' has been created.")

df_excel = pd.read_excel('epa-sea-level.xlsx')
print("Excel Data:")
print(df_excel.head())

df.to_json('epa-sea-level.json', orient='records', lines=True)
print("JSON file 'epa-sea-level.json' has been created.")

df_json = pd.read_json('epa-sea-level.json', orient='records', lines=True)
print("JSON Data:")
print(df_json.head())

conn = sqlite3.connect('sea_level_data.db')

df.to_sql('sea_levels', conn, if_exists='replace', index=False)
print("Data has been written to SQL database.")

df_sql = pd.read_sql_query("SELECT * FROM sea_levels", conn)
print("SQL Data:")
print(df_sql.head())

conn.close()

CSV file 'epa-sea-level.csv' has been created.
CSV Data:
   Year  CSIRO Adjusted Sea Level
0  1880                       0.1
1  1881                       0.1
2  1882                       0.1
3  1883                       0.1
4  1884                       0.1
Excel file 'epa-sea-level.xlsx' has been created.
Excel Data:
   Year  CSIRO Adjusted Sea Level
0  1880                       0.1
1  1881                       0.1
2  1882                       0.1
3  1883                       0.1
4  1884                       0.1
JSON file 'epa-sea-level.json' has been created.
JSON Data:
   Year  CSIRO Adjusted Sea Level
0  1880                       0.1
1  1881                       0.1
2  1882                       0.1
3  1883                       0.1
4  1884                       0.1
Data has been written to SQL database.
SQL Data:
   Year  CSIRO Adjusted Sea Level
0  1880                       0.1
1  1881                       0.1
2  1882                       0.1
3  1883                 

In [15]:
#
import pandas as pd

df = pd.read_csv('epa-sea-level.csv')

print(df.head())
print(df.isnull().sum())
print(df.dtypes)
print(df.describe())
print(df.columns)

if 'UnwantedColumn' in df.columns:
    df.drop(columns=['UnwantedColumn'], inplace=True)

df_filtered = df[df['Year'] > 2000]

if 'SomeColumn' in df.columns:
    df.dropna(subset=['SomeColumn'], inplace=True)

if 'SomeColumn' in df.columns:
    df['SomeColumn'].fillna(value=0, inplace=True)

df.fillna(method='ffill', inplace=True)

if 'SomeColumn' in df.columns:
    df.dropna(subset=['SomeColumn'], inplace=True)

df.interpolate(method='linear', inplace=True)

df['CSIRO Adjusted Sea Level'].fillna(df['CSIRO Adjusted Sea Level'].median(), inplace=True)

df.rename(columns={'CSIRO Adjusted Sea Level': 'Sea_Level_in_meters'}, inplace=True)

df.dropna(subset=['Year'], inplace=True)

print(df_filtered.head())

df.to_csv('modified_epa_sea_level.csv', index=False)


   Year  CSIRO Adjusted Sea Level
0  1880                       0.1
1  1881                       0.1
2  1882                       0.1
3  1883                       0.1
4  1884                       0.1
Year                        0
CSIRO Adjusted Sea Level    0
dtype: int64
Year                          int64
CSIRO Adjusted Sea Level    float64
dtype: object
              Year  CSIRO Adjusted Sea Level
count    46.000000                 46.000000
mean   1956.304348                  3.523913
std      61.813652                  3.346453
min    1880.000000                  0.100000
25%    1891.250000                  0.100000
50%    2001.500000                  4.400000
75%    2012.750000                  6.450000
max    2024.000000                  9.000000
Index(['Year', 'CSIRO Adjusted Sea Level'], dtype='object')
    Year  CSIRO Adjusted Sea Level
22  2001                       4.3
23  2002                       4.5
24  2003                       4.7
25  2004                       4

  df.fillna(method='ffill', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['CSIRO Adjusted Sea Level'].fillna(df['CSIRO Adjusted Sea Level'].median(), inplace=True)
