# working with CSV files

In [1]:
import pandas as pd
df = pd.read_csv('Students.csv')
df

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


# nrows:
The nrows parameter is used to specify the number of rows to read from a file. It is often used to read only a subset of rows from a large dataset

In [3]:
df1 = pd.read_csv('Students.csv',nrows=1)
df1

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0


# usecols:
The usecols parameter is used to specify which columns to read from the file. It allows you to read only specific columns instead of the entire dataset

In [5]:
df2 = pd.read_csv('Students.csv',usecols=[0,3])
df2

Unnamed: 0,Name,Position
0,Avery Bradley,PG
1,Jae Crowder,SF
2,John Holland,SG
3,R.J. Hunter,SG
4,Jonas Jerebko,PF
...,...,...
453,Shelvin Mack,PG
454,Raul Neto,PG
455,Tibor Pleiss,C
456,Jeff Withey,C


# skiprows:
The skiprows parameter is used to specify the number of rows to skip from the beginning of the file while reading. It's often used when the file has a header or metadata that needs to be skipped

In [6]:
df3 = pd.read_csv('Students.csv',skiprows=0)
df3

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


# index_col:
The index_col parameter is used to specify which column should be used as the index while reading the data. It sets the index of the DataFrame based on the specified column

In [8]:
df4 = pd.read_csv('Students.csv',index_col=[0])
df4

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...
Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


# header:
The header parameter is used to specify which row should be considered as the header (column names) of the DataFrame. It can take values like None (no header), an integer (row number as header), or a list of integers (multiple rows as header)

In [11]:
df5 = pd.read_csv('Students.csv',header=None)
df5

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
1,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
2,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
3,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
4,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
...,...,...,...,...,...,...,...,...,...
454,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
455,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
456,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
457,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


# names:
The names parameter is used to explicitly provide column names when the file doesn't have a header. You provide a list of strings that will be used as column names

In [15]:
df6 = pd.read_csv('Students.csv',names=['col0','col1','col2','col3','col4','col5','col6','col7'])
df6

Unnamed: 0,col0,col1,col2,col3,col4,col5,col6,col7
Name,Team,Number,Position,Age,Height,Weight,College,Salary
Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
...,...,...,...,...,...,...,...,...
Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
