In [1]:
# Pandas I/O API is a set of top level reader functions accessed like:
#      pd.read_csv() - returns a Pandas object
# 2 workhorse functions for reading text files 
# (both use the same parsing code to intelligently convert tabular data into a DataFrame object):
#     read_csv()
#     read_table()

In [2]:
# pandas.read_csv(filepath_or_buffer, sep=',', delimiter=None, header='infer',
#                 names=None, index_col=None, usecols=None)

In [3]:
import pandas as pd
import numpy as np

In [5]:
# read csv
df1 = pd.read_csv("D:/Project/Data Science/pandas/dataset/temp_26_io.csv")
print(df1)

   S.No    Name  Age      City  Salary
0     1     Tom   28   Toronto   20000
1     2     Lee   32  HongKong    5000
2     3  Steven   43  Bay Area    8300
3     4     Ram   38    Tokiyo    9000


In [6]:
# custom index --> specifies a column in the csv file to customize the index using index_col
df2 = pd.read_csv("D:/Project/Data Science/pandas/dataset/temp_26_io.csv", index_col=['S.No'])
print(df2)

        Name  Age      City  Salary
S.No                               
1        Tom   28   Toronto   20000
2        Lee   32  HongKong    5000
3     Steven   43  Bay Area    8300
4        Ram   38    Tokiyo    9000


In [14]:
# converts --> dtype of the columns can be passed as a dict
print(df3.dtypes)
print(df3)

df3 = pd.read_csv("D:/Project/Data Science/pandas/dataset/temp_26_io.csv", dtype={'Salary': np.float64})
print("After converting the dtype of Salary to float64:")
print(df3.dtypes)
print(df3)

S.No        int64
Name       object
Age         int64
City       object
Salary    float64
dtype: object
   S.No    Name  Age      City   Salary
0     1     Tom   28   Toronto  20000.0
1     2     Lee   32  HongKong   5000.0
2     3  Steven   43  Bay Area   8300.0
3     4     Ram   38    Tokiyo   9000.0
After converting the dtype of Salary to float64:
S.No        int64
Name       object
Age         int64
City       object
Salary    float64
dtype: object
   S.No    Name  Age      City   Salary
0     1     Tom   28   Toronto  20000.0
1     2     Lee   32  HongKong   5000.0
2     3  Steven   43  Bay Area   8300.0
3     4     Ram   38    Tokiyo   9000.0


In [15]:
# header_names --> specify the names of the header using the names argument
df4 = pd.read_csv("D:/Project/Data Science/pandas/dataset/temp_26_io.csv", names=['a', 'b', 'c', 'd', 'e'])
print(df4)

      a       b    c         d       e
0  S.No    Name  Age      City  Salary
1     1     Tom   28   Toronto   20000
2     2     Lee   32  HongKong    5000
3     3  Steven   43  Bay Area    8300
4     4     Ram   38    Tokiyo    9000


In [19]:
# the header is in a row other than the first, pass the row number to header
df5 = pd.read_csv("D:/Project/Data Science/pandas/dataset/temp_26_io.csv",names=['a','b','c','d','e'],header=0)
print(df5)

   a       b   c         d      e
0  1     Tom  28   Toronto  20000
1  2     Lee  32  HongKong   5000
2  3  Steven  43  Bay Area   8300
3  4     Ram  38    Tokiyo   9000


In [20]:
# skiprows --> skips the number of rows specified
df6 = pd.read_csv("D:/Project/Data Science/pandas/dataset/temp_26_io.csv", skiprows=2)
print(df6)

   2     Lee  32  HongKong  5000
0  3  Steven  43  Bay Area  8300
1  4     Ram  38    Tokiyo  9000
