### <center>  PANDAS TUTORIAL </center>

    This tutorial covers the following aspects of Pandas

    1. Loading data using read_csv
    2. Plotting with matplotlib and seaborn
    3. Concatenating & Merging
    4. Handling Missing Data
    5. Tidying Data
    6. Grouping & Pivoting Data
    7. Vectorized Computations
    8. Method Chaining
    9. Manipulating Datetime columns
    10. 
    11. 
    
    Modeling

In [1]:
# Importing libraries
import os
import numpy as np
import pandas as pd
from pandas_datareader import data as wb
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
# Changing the directory path
os.chdir("/Users/pavitragajanana/development/2. Data Files/")
os.getcwd()
%ls

Alternative Fuel Stations.csv
Alternative Fuel Vehicles.csv
BorderCrossingEntryData.csv
Credit_Ratings_Fitch.csv
CrudeOil_Annual_Cushing.csv
CrudeOil_Daily_Cushing_OK_WTI_Spot_Price_FOB.csv
CrudeOil_Daily_Europe_Brent_Spot_Price_FOB.csv
CrudeOil_Monthly_Cushing_OK_WTI_Spot_Price_FOB.csv
CrudeOil_Monthly_Dubai Crude_USD.csv
CrudeOil_Monthly_Europe_Brent_Spot_Price_FOB.csv
Fitch-ratings-history.csv
FossilFuelEnergyConsumption.csv
LightDutyVehicles.csv
PeriodicTable.csv
TempVehicles.csv
US-States.csv
[31mUSDINRExchangeRate.csv[m[m*
airline_delay.csv
all-vehicles-model.csv
bank-data.csv
banklist.csv
euro-exchange-rates.csv
ntsb-aviation-accident-dataset.csv
tao-all2.dat.gz
vehicles.csv


 > ### Loading data using read_csv

In [3]:
# Loading vehicles data over the years using read_csv
airline_delay = pd.read_csv("airline_delay.csv",
                        sep=',', # To be mentioned, when ',' is not the separator
                        delimiter=None, 
                        header='infer', 
                        names=None, # List of names can be passed 
                        index_col=None, # Mentioned column will be set as row index
                        usecols=None, # List of columns to be read
                        squeeze=False, # True if your dataset has a single column and you want it to be read as a series
                        prefix=None,  # When data doesnt have headers, pandas assigns numbers to col names and that can be prefixed
                        mangle_dupe_cols=True, # If data has duplicate names in columns, such can be handled
                       
                        dtype=None, 
                        engine=None, # Used to specify the parsing engine
                        converters=None, # Use converters to convert values in certain columns, using a dict of functions
                        true_values=None, # Interprets the list of values as True
                        false_values=None, # Interprets the list of values as False
                        
                        skipinitialspace=False, # Trims leading or trailing spaces in a field
                        skiprows=None, # When integer, excludes skips n rows from begining; when list, skips the mentioned rows
                        skipfooter=0, # Skips last n rows
                        nrows=None, # Reads n rows
                       
                       # By default, ‘#N/A’, ‘#N/A N/A’, ‘#NA’, ‘-1.#IND’, ‘-1.#QNAN’, ‘-NaN’, ‘-nan’, ‘1.#IND’, ‘1.#QNAN’, ‘N/A’, ‘NA’, ‘NULL’, ‘NaN’, ‘n/a’, ‘nan’, ‘null’ are considered NaN
                        na_values=[""], # Specified strings additional to the ones mentioned above will be considered as NaN, provided keep_default_na is True
                        keep_default_na=False, # Switch this to False if you want to omit the default set of strings that will be considered as NaN 
                        na_filter=True, # For a large dataset, set na_filter as False if you are sure that data does not have missing values to improve speed
                       
                        verbose=False, # Setting this to True displays additional information
                        skip_blank_lines=True, # Setting this to False inserts NaN for all na values
                        parse_dates=False, # [1,2,3] will be considered datetime individually; [[1,2,3]] combined will be considered datetime
                        infer_datetime_format=False, # set infer_datetime_format to True and parse_dates to a value will result in increasing parsing speed of that column by 10 times
                        keep_date_col=False, # To retain the combined and the original date columns when a set of columns are combined to form a new datetime column
                        date_parser=None, # Allows to add a function to parse the datetime in a specified format
                        dayfirst=False,  # Enables to indicate that day comes first in your column 
                        cache_dates=True, 
                        iterator=False, # Setting this to True will convert it into a file reader and df.get_chunk(n) can be used to retrieve values from the iterator
                        chunksize=None,
                        compression='infer', # will read compressed files
                       
                        thousands=None,
                        decimal=b'.',
                        lineterminator=None,
                        
                        quotechar='"',
                        quoting=0,
                        doublequote=True,
                        escapechar=None,
                        comment=None,
                        encoding=None,
                        dialect=None,
                        error_bad_lines=True,
                        warn_bad_lines=True,
                        delim_whitespace=False,
                        
                        low_memory=False,
                        memory_map=False,
                        float_precision=None,
                       )

airline_delay.dtypes

year                     int64
 month                   int64
carrier                 object
carrier_name            object
airport                 object
airport_name            object
arr_flights            float64
arr_del15              float64
carrier_ct             float64
 weather_ct            float64
nas_ct                 float64
security_ct            float64
late_aircraft_ct       float64
arr_cancelled          float64
arr_diverted           float64
 arr_delay             float64
 carrier_delay         float64
weather_delay          float64
nas_delay              float64
security_delay         float64
late_aircraft_delay    float64
Unnamed: 21            float64
dtype: object