In [None]:
# In this notebook we will study file handling in Python.
# First we will study how to handle a CSV file using the CSV module
# Method 1: Using csv.reader (reads rows as lists)
#
# The input file Auto.csv has a header and 397 data points arranged row-wise
# with 9 features/descriptors explained as follows:
#
# mpg: miles per gallon
# cylinders: Number of cylinders between 4 and 8
# displacement: Engine displacement (cu. inches)
# horsepower: Engine horsepower
# weight: Vehicle weight (lbs.)
# acceleration: Time to accelerate from 0 to 60 mph (sec.)
# year: Model year (modulo 100)
# origin: Origin of car (1. American, 2. European, 3. Japanese)
# name: Vehicle name
#
import csv
filename = "Auto.csv"  # Replace with your CSV file name
with open(filename, 'r', newline='') as csvfile:
    csv_reader = csv.reader(csvfile)
#
# If your CSV has a header row and you want to skip it:
#
    header = next(csv_reader)
    print(f"Header: {header}")

# Iterate through the remaining rows
    for row in csv_reader:
        print(row)

In [None]:
#Now we read the same file as dictionaries instead of lists
import csv
filename = "Auto.csv"  # Replace with your CSV file name
with open(filename, 'r', newline='') as csvfile:
        csvreader = csv.DictReader(csvfile)

        for row in csvreader:
            print(row)  # Each row is a dictionary where keys are header names
 

In [None]:
#Let us now use Pandas library for reading the .csv file and for subsequent data manipulation
#
import pandas as pd
import numpy as np
Auto = pd.read_csv('Auto.csv')
Auto

In [None]:
import pandas as pd
import numpy as np
Auto = pd.read_csv('Auto.csv')
Auto['horsepower']


In [None]:
# Using np.unique function, we can eliminate the repeated values
#
import pandas as pd
import numpy as np
Auto = pd.read_csv('Auto.csv')
np.unique(Auto['horsepower'])

In [None]:
#We see that ? is being used to encode some missing values. Therefore, we declare '?'
#as NaN (not a number) or like a whitespace or a comma (,)
import numpy as np
Auto = pd.read_csv('Auto.csv',na_values =['?'])
Auto['horsepower'].sum()

In [None]:
import pandas as pd
import numpy as np
Auto = pd.read_csv('Auto.csv',na_values =['?'])
Auto.shape

In [None]:
# Now we declare a new data type Auto_new created by removing all the rows carrying ?
import pandas as pd
import numpy as np
Auto = pd.read_csv('Auto.csv',na_values =['?'])
Auto_new = Auto.dropna ()
Auto_new.shape

In [None]:
# We write down the column headers for the corrected data
#
import pandas as pd
import numpy as np
Auto = pd.read_csv('Auto.csv',na_values =['?'])
Auto_new = Auto.dropna ()
Auto = Auto_new
Auto.columns

In [None]:
# Let us write out the first three rows of the new data 
#
import pandas as pd
import numpy as np
Auto = pd.read_csv('Auto.csv',na_values =['?'])
Auto_new = Auto.dropna ()
Auto = Auto_new
Auto.columns
Auto [:3]

In [None]:
#Here we print out those rows in which the car model was more recent
#than 1980
import pandas as pd
import numpy as np
Auto = pd.read_csv('Auto.csv',na_values =['?'])
Auto_new = Auto.dropna ()
Auto = Auto_new
idx_80 = Auto['year'] > 80
Auto[idx_80]

In [None]:
#We can also print out the data of a few selected columns
#than 1980
import pandas as pd
import numpy as np
Auto = pd.read_csv('Auto.csv',na_values =['?'])
Auto_new = Auto.dropna ()
Auto = Auto_new
Auto [['mpg', 'horsepower']]

In [None]:
#There are a huge number of functions in Pandas which are useful for data manipulation
#I urge you to explore those on your own.
#Here we learn how to write the data in a csv file using Pandas
# First, we have to convert the given data into a dataframe, and then output that 
# data using 'to_csv' command
import pandas as pd
import numpy as np
Auto = pd.read_csv('Auto.csv',na_values =['?'])
Auto_new = Auto.dropna ()
Auto = Auto_new
#Now the new corrected file is converted into a dataframe
df = pd.DataFrame(Auto)
#The dataframe is written into a new file called Auto_new.csv
df.to_csv('Auto_new.csv')