#### [pandas.pydata.org](https://pandas.pydata.org/pandas-docs/stable/index.html)

In [1]:
import pandas as pd
import numpy as np

## CSV : Comma-Separated Values

### to_csv 

`to_csv(path_or_buf, sep, columns, header, index, index_label)‍‍`

In [2]:
mydata = {'name' : ['Ali', 'Sara', 'Taha', 'Omid', 'Negar'], 
          'age'  : [27, 24, 25, 26, 21], 
          'score': [19, 18, 20, 13, 17]}

In [3]:
mydf = pd.DataFrame(mydata).set_index('name'); mydf

Unnamed: 0_level_0,age,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ali,27,19
Sara,24,18
Taha,25,20
Omid,26,13
Negar,21,17


In [4]:
path = 'd:Files/to_test.csv'

path_or_buf: 'FilePath'

In [5]:
mydf.to_csv(path)
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


sep: 'str'

In [6]:
mydf.to_csv(path)
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [7]:
mydf.to_csv(path, sep='|')
pd.read_csv(path)

Unnamed: 0,name|age|score
0,Ali|27|19
1,Sara|24|18
2,Taha|25|20
3,Omid|26|13
4,Negar|21|17


In [8]:
mydf.to_csv(path, sep='.')
pd.read_csv(path)

Unnamed: 0,name.age.score
0,Ali.27.19
1,Sara.24.18
2,Taha.25.20
3,Omid.26.13
4,Negar.21.17


columns: 'Sequence[Hashable] | None'

In [9]:
mydf.to_csv(path)
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [10]:
mydf.to_csv(path, columns=['age'])
pd.read_csv(path)

Unnamed: 0,name,age
0,Ali,27
1,Sara,24
2,Taha,25
3,Omid,26
4,Negar,21


In [11]:
mydf.to_csv(path, columns=['score', 'age'])
pd.read_csv(path)

Unnamed: 0,name,score,age
0,Ali,19,27
1,Sara,18,24
2,Taha,20,25
3,Omid,13,26
4,Negar,17,21


In [12]:
mydf.to_csv(path, columns=['score', 'age', 'score'])
pd.read_csv(path)

Unnamed: 0,name,score,age,score.1
0,Ali,19,27,19
1,Sara,18,24,18
2,Taha,20,25,20
3,Omid,13,26,13
4,Negar,17,21,17


header: 'bool_t | list[str]'

In [13]:
mydf.to_csv(path)
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [14]:
mydf.to_csv(path, header=False)
pd.read_csv(path)

Unnamed: 0,Ali,27,19
0,Sara,24,18
1,Taha,25,20
2,Omid,26,13
3,Negar,21,17


In [15]:
mydf.to_csv(path, header=None)
pd.read_csv(path)

Unnamed: 0,Ali,27,19
0,Sara,24,18
1,Taha,25,20
2,Omid,26,13
3,Negar,21,17


In [16]:
mydf.to_csv(path, header=0)
pd.read_csv(path)

Unnamed: 0,Ali,27,19
0,Sara,24,18
1,Taha,25,20
2,Omid,26,13
3,Negar,21,17


In [17]:
mydf.to_csv(path, header=True)
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [18]:
mydf.to_csv(path, header=1)
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [19]:
mydf.to_csv(path, header=['a', 's'])
pd.read_csv(path)

Unnamed: 0,name,a,s
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


index: 'bool_t'

In [20]:
mydf.to_csv(path)
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [21]:
mydf.to_csv(path, index=False)
pd.read_csv(path)

Unnamed: 0,age,score
0,27,19
1,24,18
2,25,20
3,26,13
4,21,17


In [22]:
mydf.to_csv(path, index=None)
pd.read_csv(path)

Unnamed: 0,age,score
0,27,19
1,24,18
2,25,20
3,26,13
4,21,17


In [23]:
mydf.to_csv(path, index=0)
pd.read_csv(path)

Unnamed: 0,age,score
0,27,19
1,24,18
2,25,20
3,26,13
4,21,17


In [24]:
mydf.to_csv(path, index=True)
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [25]:
mydf.to_csv(path, index=1)
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


index_label: 'IndexLabel | None'

In [26]:
mydf.to_csv(path)
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [27]:
mydf.to_csv(path, index_label='esm')
pd.read_csv(path)

Unnamed: 0,esm,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


**index of data**

In [28]:
mydata = {'name' : ['Ali', 'Sara', 'Taha', 'Omid'], 
          'age'  : [27, 24, 25, 26], 
          'Score': [19, 18, 20, 13]}

In [29]:
mydf = pd.DataFrame(mydata); mydf

Unnamed: 0,name,age,Score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


In [30]:
mydf.to_csv('d:Files/score1.csv', index=False)

In [31]:
pd.read_csv('d:Files/score1.csv')

Unnamed: 0,name,age,Score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


In [32]:
#---------------

In [33]:
mydf.to_csv('d:Files/score2.csv')

In [34]:
pd.read_csv('d:Files/score2.csv')

Unnamed: 0.1,Unnamed: 0,name,age,Score
0,0,Ali,27,19
1,1,Sara,24,18
2,2,Taha,25,20
3,3,Omid,26,13


In [35]:
pd.read_csv('d:Files/score2.csv', usecols=[1, 2, 3])

Unnamed: 0,name,age,Score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


In [36]:
#---------------

In [37]:
mydf2 = pd.DataFrame(mydata).set_index('name'); mydf

Unnamed: 0,name,age,Score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


In [38]:
mydf2.to_csv('d:Files/score3.csv')

In [39]:
pd.read_csv('d:Files/score3.csv')

Unnamed: 0,name,age,Score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


### read_csv 

`read_csv(filepath_or_buffer, header, names, index_col, usecols, skiprows, skipfooter, nrows, parse_dates, chunksize)`

In [40]:
mydata = {'name' : ['Ali', 'Sara', 'Taha', 'Omid', 'Negar'], 
          'age'  : [27, 24, 25, 26, 21], 
          'score': [19, 18, 20, 13, 17]}

In [41]:
mydf = pd.DataFrame(mydata).set_index('name'); mydf

Unnamed: 0_level_0,age,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ali,27,19
Sara,24,18
Taha,25,20
Omid,26,13
Negar,21,17


In [42]:
path = 'd:Files/read_test.csv'

In [43]:
mydf.to_csv(path)

filepath_or_buffer: 'FilePath'

In [44]:
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [45]:
with open(path) as f:
    x = pd.read_csv(f)
x   

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


sep: 'str | None'

In [46]:
path = 'd:Files/read_test.csv'

In [47]:
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [48]:
pd.read_csv(path, sep='|')

Unnamed: 0,"name,age,score"
0,"Ali,27,19"
1,"Sara,24,18"
2,"Taha,25,20"
3,"Omid,26,13"
4,"Negar,21,17"


header: 'int | Sequence[int] | None'

In [49]:
path = 'd:Files/read_test.csv'

In [50]:
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [51]:
pd.read_csv(path, header=None)

Unnamed: 0,0,1,2
0,name,age,score
1,Ali,27,19
2,Sara,24,18
3,Taha,25,20
4,Omid,26,13
5,Negar,21,17


In [52]:
pd.read_csv(path, header=0)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [53]:
pd.read_csv(path, header=3)

Unnamed: 0,Taha,25,20
0,Omid,26,13
1,Negar,21,17


In [54]:
pd.read_csv(path, header=[0, 2])

Unnamed: 0_level_0,name,age,score
Unnamed: 0_level_1,Sara,24,18
0,Taha,25,20
1,Omid,26,13
2,Negar,21,17


names: 'Sequence[Hashable] | None'

In [55]:
path = 'd:Files/read_test.csv'

In [56]:
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [57]:
pd.read_csv(path, names=['nam', 'sen', 'nomre'])

Unnamed: 0,nam,sen,nomre
0,name,age,score
1,Ali,27,19
2,Sara,24,18
3,Taha,25,20
4,Omid,26,13
5,Negar,21,17


In [58]:
pd.read_csv(path, header=0, names=['nam', 'sen', 'nomre'])

Unnamed: 0,nam,sen,nomre
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


index_col: 'IndexLabel | Literal[False] | None'

In [59]:
path = 'd:Files/read_test.csv'

In [60]:
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [61]:
pd.read_csv(path, index_col=None)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [62]:
pd.read_csv(path, index_col=0)

Unnamed: 0_level_0,age,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ali,27,19
Sara,24,18
Taha,25,20
Omid,26,13
Negar,21,17


In [63]:
pd.read_csv(path, index_col=2)

Unnamed: 0_level_0,name,age
score,Unnamed: 1_level_1,Unnamed: 2_level_1
19,Ali,27
18,Sara,24
20,Taha,25
13,Omid,26
17,Negar,21


In [64]:
pd.read_csv(path, index_col=[0, 1])

Unnamed: 0_level_0,Unnamed: 1_level_0,score
name,age,Unnamed: 2_level_1
Ali,27,19
Sara,24,18
Taha,25,20
Omid,26,13
Negar,21,17


In [65]:
pd.read_csv(path, index_col=['name'])

Unnamed: 0_level_0,age,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ali,27,19
Sara,24,18
Taha,25,20
Omid,26,13
Negar,21,17


In [66]:
pd.read_csv(path, index_col=['name', 'age'])

Unnamed: 0_level_0,Unnamed: 1_level_0,score
name,age,Unnamed: 2_level_1
Ali,27,19
Sara,24,18
Taha,25,20
Omid,26,13
Negar,21,17


usecols: 'list[HashableT] | Callable[[Hashable], bool] | None'

In [67]:
path = 'd:Files/read_test.csv'

In [68]:
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [69]:
pd.read_csv(path, usecols=None)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [70]:
pd.read_csv(path, usecols=[0])

Unnamed: 0,name
0,Ali
1,Sara
2,Taha
3,Omid
4,Negar


In [71]:
pd.read_csv(path, usecols=[0, 2])

Unnamed: 0,name,score
0,Ali,19
1,Sara,18
2,Taha,20
3,Omid,13
4,Negar,17


In [72]:
pd.read_csv(path, usecols=['name'])

Unnamed: 0,name
0,Ali
1,Sara
2,Taha
3,Omid
4,Negar


In [73]:
pd.read_csv(path, usecols=['name', 'score'])

Unnamed: 0,name,score
0,Ali,19
1,Sara,18
2,Taha,20
3,Omid,13
4,Negar,17


In [74]:
pd.read_csv(path, index_col=['name'], usecols=['name', 'score'])

Unnamed: 0_level_0,score
name,Unnamed: 1_level_1
Ali,19
Sara,18
Taha,20
Omid,13
Negar,17


parse_dates: 'bool | Sequence[Hashable] | None'=None

In [75]:
path = "D:Files/Data/btc-usd.csv"

In [76]:
df = pd.read_csv(path, index_col='Date')

In [77]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-04-30,53568.664063,57900.71875,53129.601563,57750.175781,57750.175781,52395931985
2021-05-01,57714.664063,58448.339844,57052.273438,57828.050781,57828.050781,42836427360
2021-05-02,57825.863281,57902.59375,56141.90625,56631.078125,56631.078125,38177405335
2021-05-03,56620.273438,58973.308594,56590.871094,57200.292969,57200.292969,51713139031
2021-05-04,57214.179688,57214.179688,53191.425781,53333.539063,53333.539063,68564706967


In [78]:
df.index

Index(['2021-04-30', '2021-05-01', '2021-05-02', '2021-05-03', '2021-05-04',
       '2021-05-05', '2021-05-06', '2021-05-07', '2021-05-08', '2021-05-09',
       ...
       '2022-04-21', '2022-04-22', '2022-04-23', '2022-04-24', '2022-04-25',
       '2022-04-26', '2022-04-27', '2022-04-28', '2022-04-29', '2022-04-30'],
      dtype='object', name='Date', length=366)

In [79]:
type(df.index)    # base.Index

pandas.core.indexes.base.Index

In [80]:
#---------------

In [81]:
df = pd.read_csv(path, index_col='Date', parse_dates=['Date'])
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-04-30,53568.664063,57900.71875,53129.601563,57750.175781,57750.175781,52395931985
2021-05-01,57714.664063,58448.339844,57052.273438,57828.050781,57828.050781,42836427360
2021-05-02,57825.863281,57902.59375,56141.90625,56631.078125,56631.078125,38177405335
2021-05-03,56620.273438,58973.308594,56590.871094,57200.292969,57200.292969,51713139031
2021-05-04,57214.179688,57214.179688,53191.425781,53333.539063,53333.539063,68564706967


In [82]:
df.index

DatetimeIndex(['2021-04-30', '2021-05-01', '2021-05-02', '2021-05-03',
               '2021-05-04', '2021-05-05', '2021-05-06', '2021-05-07',
               '2021-05-08', '2021-05-09',
               ...
               '2022-04-21', '2022-04-22', '2022-04-23', '2022-04-24',
               '2022-04-25', '2022-04-26', '2022-04-27', '2022-04-28',
               '2022-04-29', '2022-04-30'],
              dtype='datetime64[ns]', name='Date', length=366, freq=None)

In [83]:
type(df.index)    # datetimes.DatetimeIndex

pandas.core.indexes.datetimes.DatetimeIndex

In [84]:
#---------------

In [85]:
df = pd.read_csv(path, parse_dates=['Date'])
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-04-30,53568.664063,57900.71875,53129.601563,57750.175781,57750.175781,52395931985
1,2021-05-01,57714.664063,58448.339844,57052.273438,57828.050781,57828.050781,42836427360
2,2021-05-02,57825.863281,57902.59375,56141.90625,56631.078125,56631.078125,38177405335
3,2021-05-03,56620.273438,58973.308594,56590.871094,57200.292969,57200.292969,51713139031
4,2021-05-04,57214.179688,57214.179688,53191.425781,53333.539063,53333.539063,68564706967


In [86]:
df.dtypes

Date         datetime64[ns]
Open                float64
High                float64
Low                 float64
Close               float64
Adj Close           float64
Volume                int64
dtype: object

In [87]:
# squeeze()

In [88]:
pd.read_csv(path, index_col='Date', parse_dates=['Date'], usecols=['Date', 'Close'])

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2021-04-30,57750.175781
2021-05-01,57828.050781
2021-05-02,56631.078125
2021-05-03,57200.292969
2021-05-04,53333.539063
...,...
2022-04-26,38117.460938
2022-04-27,39241.121094
2022-04-28,39773.828125
2022-04-29,38609.824219


In [89]:
pd.read_csv(path, index_col='Date', parse_dates=['Date'], usecols=['Date', 'Close']).squeeze()

Date
2021-04-30    57750.175781
2021-05-01    57828.050781
2021-05-02    56631.078125
2021-05-03    57200.292969
2021-05-04    53333.539063
                  ...     
2022-04-26    38117.460938
2022-04-27    39241.121094
2022-04-28    39773.828125
2022-04-29    38609.824219
2022-04-30    38281.816406
Name: Close, Length: 366, dtype: float64

In [90]:
pd.read_csv(path, index_col='Date', parse_dates=['Date'], usecols=['Date', 'Close'])['Close']

Date
2021-04-30    57750.175781
2021-05-01    57828.050781
2021-05-02    56631.078125
2021-05-03    57200.292969
2021-05-04    53333.539063
                  ...     
2022-04-26    38117.460938
2022-04-27    39241.121094
2022-04-28    39773.828125
2022-04-29    38609.824219
2022-04-30    38281.816406
Name: Close, Length: 366, dtype: float64

skiprows: 'list[int] | int | Callable[[Hashable], bool] | None'

In [91]:
path = 'd:Files/read_test.csv'

In [92]:
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [93]:
pd.read_csv(path, skiprows=0)        # skip 0 row

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [94]:
pd.read_csv(path, skiprows=1)  

Unnamed: 0,Ali,27,19
0,Sara,24,18
1,Taha,25,20
2,Omid,26,13
3,Negar,21,17


In [95]:
pd.read_csv(path, skiprows=2)        # skip 2 rows

Unnamed: 0,Sara,24,18
0,Taha,25,20
1,Omid,26,13
2,Negar,21,17


In [96]:
pd.read_csv(path, skiprows=[2])      # skip row 2

Unnamed: 0,name,age,score
0,Ali,27,19
1,Taha,25,20
2,Omid,26,13
3,Negar,21,17


In [97]:
pd.read_csv(path, skiprows=[1])

Unnamed: 0,name,age,score
0,Sara,24,18
1,Taha,25,20
2,Omid,26,13
3,Negar,21,17


In [98]:
pd.read_csv(path, skiprows=[0])

Unnamed: 0,Ali,27,19
0,Sara,24,18
1,Taha,25,20
2,Omid,26,13
3,Negar,21,17


In [99]:
pd.read_csv(path, skiprows=[1, 4])   # skip row 1 & 4

Unnamed: 0,name,age,score
0,Sara,24,18
1,Taha,25,20
2,Negar,21,17


In [100]:
#---------------

In [101]:
pd.read_csv(path, names=['nam', 'sen', 'nomre'])

Unnamed: 0,nam,sen,nomre
0,name,age,score
1,Ali,27,19
2,Sara,24,18
3,Taha,25,20
4,Omid,26,13
5,Negar,21,17


In [102]:
pd.read_csv(path, names=['nam', 'sen', 'nomre'], skiprows=[0])

Unnamed: 0,nam,sen,nomre
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [103]:
pd.read_csv(path, names=['nam', 'sen', 'nomre'], index_col=0, skiprows=[0])

Unnamed: 0_level_0,sen,nomre
nam,Unnamed: 1_level_1,Unnamed: 2_level_1
Ali,27,19
Sara,24,18
Taha,25,20
Omid,26,13
Negar,21,17


In [104]:
#---------------

In [105]:
pd.read_csv('d:Files/age.csv')

Unnamed: 0,# Hello,shirafkan
0,name,age
1,Ali,12
2,Sara,13
3,Taha,12
4,Mahsa,20


In [106]:
pd.read_csv('d:Files/age.csv', header=[1])

Unnamed: 0,name,age
0,Ali,12
1,Sara,13
2,Taha,12
3,Mahsa,20


In [107]:
pd.read_csv('d:Files/age.csv', header=[0, 1])

Unnamed: 0_level_0,# Hello,shirafkan
Unnamed: 0_level_1,name,age
0,Ali,12
1,Sara,13
2,Taha,12
3,Mahsa,20


In [108]:
pd.read_csv('d:Files/age.csv', skiprows=[0])

Unnamed: 0,name,age
0,Ali,12
1,Sara,13
2,Taha,12
3,Mahsa,20


In [109]:
pd.read_csv('d:Files/age.csv', skiprows=[0, 1])

Unnamed: 0,Ali,12
0,Sara,13
1,Taha,12
2,Mahsa,20


In [110]:
pd.read_csv('d:Files/age.csv', skiprows=[0, 1], header=None)

Unnamed: 0,0,1
0,Ali,12
1,Sara,13
2,Taha,12
3,Mahsa,20


In [111]:
pd.read_csv('d:Files/age.csv', names=['nam', 'sen'], skiprows=[0, 1])

Unnamed: 0,nam,sen
0,Ali,12
1,Sara,13
2,Taha,12
3,Mahsa,20


skipfooter: 'int'

In [112]:
path = 'd:Files/read_test.csv'

In [113]:
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [114]:
pd.read_csv(path, skipfooter=1, engine='python')

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


In [115]:
pd.read_csv(path, skipfooter=2, engine='python')

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20


In [116]:
pd.read_csv(path, skipfooter=4, engine='python')

Unnamed: 0,name,age,score
0,Ali,27,19


nrows: 'int | None'

In [117]:
path = 'd:Files/read_test.csv'

In [118]:
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [119]:
pd.read_csv(path, nrows=0)

Unnamed: 0,name,age,score


In [120]:
pd.read_csv(path, nrows=2)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18


In [121]:
pd.read_csv(path, nrows=4)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


chunksize: 'int | None'

In [122]:
path = 'd:Files/read_test.csv'

In [123]:
pd.read_csv(path)

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13
4,Negar,21,17


In [124]:
pd.read_csv(path, chunksize=1)

<pandas.io.parsers.readers.TextFileReader at 0x182da393310>

In [125]:
list(pd.read_csv(path, chunksize=1))

[  name  age  score
 0  Ali   27     19,
    name  age  score
 1  Sara   24     18,
    name  age  score
 2  Taha   25     20,
    name  age  score
 3  Omid   26     13,
     name  age  score
 4  Negar   21     17]

In [126]:
list(pd.read_csv(path, chunksize=2))

[   name  age  score
 0   Ali   27     19
 1  Sara   24     18,
    name  age  score
 2  Taha   25     20
 3  Omid   26     13,
     name  age  score
 4  Negar   21     17]

In [127]:
list(pd.read_csv(path, chunksize=3))

[   name  age  score
 0   Ali   27     19
 1  Sara   24     18
 2  Taha   25     20,
     name  age  score
 3   Omid   26     13
 4  Negar   21     17]

**select rows**

In [128]:
path = 'd:Files/Alphabet.csv'

In [129]:
pd.read_csv(path)

Unnamed: 0,col1,col2
0,A,1
1,B,2
2,C,3
3,D,4
4,E,5
5,F,6
6,G,7
7,H,8
8,I,9
9,J,10


In [130]:
pd.read_csv(path, nrows=6)

Unnamed: 0,col1,col2
0,A,1
1,B,2
2,C,3
3,D,4
4,E,5
5,F,6


In [131]:
pd.read_csv(path, skiprows=20)

Unnamed: 0,T,20
0,U,21
1,V,22
2,W,23
3,X,24
4,Y,25
5,Z,26


In [132]:
pd.read_csv(path, skiprows=[5, 20])

Unnamed: 0,col1,col2
0,A,1
1,B,2
2,C,3
3,D,4
4,F,6
5,G,7
6,H,8
7,I,9
8,J,10
9,K,11


In [133]:
pd.read_csv(path, skipfooter=20, engine='python')

Unnamed: 0,col1,col2
0,A,1
1,B,2
2,C,3
3,D,4
4,E,5
5,F,6


In [134]:
pd.options.display.max_rows = 6
pd.read_csv(path)

Unnamed: 0,col1,col2
0,A,1
1,B,2
2,C,3
...,...,...
23,X,24
24,Y,25
25,Z,26


In [135]:
c = pd.read_csv(path, chunksize=8)
list(c)

[   col1  col2
 0     A     1
 1     B     2
 2     C     3
 ..  ...   ...
 5     F     6
 6     G     7
 7     H     8
 
 [8 rows x 2 columns],
    col1  col2
 8     I     9
 9     J    10
 10    K    11
 ..  ...   ...
 13    N    14
 14    O    15
 15    P    16
 
 [8 rows x 2 columns],
    col1  col2
 16    Q    17
 17    R    18
 18    S    19
 ..  ...   ...
 21    V    22
 22    W    23
 23    X    24
 
 [8 rows x 2 columns],
    col1  col2
 24    Y    25
 25    Z    26]

### csv.reader

In [136]:
import csv

In [137]:
with open('d:Files/score.csv') as f:
    x = pd.read_csv(f)
x   

Unnamed: 0,name,age,Score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


In [138]:
with open('d:Files/score.csv') as f:
    x = list(csv.reader(f))
x    

[['name', 'age', 'Score'],
 ['Ali', '27', '19'],
 ['Sara', '24', '18'],
 ['Taha', '25', '20'],
 ['Omid', '26', '13']]

In [139]:
df = pd.DataFrame(x); df

Unnamed: 0,0,1,2
0,name,age,Score
1,Ali,27,19
2,Sara,24,18
3,Taha,25,20
4,Omid,26,13


In [140]:
df = df.set_axis(df.iloc[0], axis=1)
df.drop(index=0, inplace=True); df

Unnamed: 0,name,age,Score
1,Ali,27,19
2,Sara,24,18
3,Taha,25,20
4,Omid,26,13


### sys.stdout

In [141]:
import sys

In [142]:
mydata = {'name' : ['Ali', 'Sara', 'Taha', 'Omid'], 
          'age'  : [27, 24, 25, 26], 
          'score': [19, 18, 20, 13]}

In [143]:
mydf = pd.DataFrame(mydata); mydf

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


In [144]:
# sys.stdout be jaye path. khoroojie estandard

In [145]:
mydf.to_csv(sys.stdout)    

,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


In [146]:
mydf.to_csv(sys.stdout, index=False, header=False, sep='|')

Ali|27|19
Sara|24|18
Taha|25|20
Omid|26|13


## Other file types

### Excel 

In [147]:
mydata = {'name' : ['Ali', 'Sara', 'Taha', 'Omid'], 
          'age'  : [27, 24, 25, 26], 
          'Score': [19, 18, 20, 13]}

In [148]:
mydf = pd.DataFrame(mydata); mydf

Unnamed: 0,name,age,Score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


In [149]:
mydf.to_excel('d:Files/score.xlsx', index=False)

In [150]:
pd.read_excel('d:Files/score.xlsx')

Unnamed: 0,name,age,Score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


### PKL 

In [151]:
d = {'name' : ['Ali', 'Sara', 'Taha', 'Omid'], 
     'age'  : [27, 24, 25, 26], 
     'score': [19, 18, 20, 13]}

In [152]:
df = pd.DataFrame(d); df

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


In [153]:
df.to_pickle('d:Files/p_test.pkl')

In [154]:
pd.read_pickle('d:Files/p_test.pkl')

Unnamed: 0,name,age,score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


### JSON 

JSON   : JavaScript Object Notation

In [155]:
import json 

In [156]:
mystr = """{  
             "FirstName": "Taha",
             "Courses"  : [{"Name": "Python", "Score": 18}, {"Name": "C++", "Score": 17 }]
        }"""

In [157]:
mydict = json.loads(mystr); mydict

{'FirstName': 'Taha',
 'Courses': [{'Name': 'Python', 'Score': 18}, {'Name': 'C++', 'Score': 17}]}

In [158]:
pd.DataFrame(mydict)

Unnamed: 0,FirstName,Courses
0,Taha,"{'Name': 'Python', 'Score': 18}"
1,Taha,"{'Name': 'C++', 'Score': 17}"


In [159]:
pd.DataFrame(mydict['Courses'])

Unnamed: 0,Name,Score
0,Python,18
1,C++,17


In [160]:
mydf = pd.DataFrame(mydict['Courses'])

In [161]:
mydf.to_json()

'{"Name":{"0":"Python","1":"C++"},"Score":{"0":18,"1":17}}'

In [162]:
path = 'd:Files/j_test.json'

In [163]:
mydf.to_json(path)
pd.read_json(path)

Unnamed: 0,Name,Score
0,Python,18
1,C++,17


In [164]:
mydf.to_json(orient='index')

'{"0":{"Name":"Python","Score":18},"1":{"Name":"C++","Score":17}}'

In [165]:
mydf.to_json(path, orient='index')
pd.read_json(path)

Unnamed: 0,0,1
Name,Python,C++
Score,18,17


In [166]:
mydf.to_json(orient='values')

'[["Python",18],["C++",17]]'

In [167]:
mydf.to_json(path, orient='values')
pd.read_json(path)

Unnamed: 0,0,1
0,Python,18
1,C++,17


In [168]:
mydf.to_json(orient='split')

'{"columns":["Name","Score"],"index":[0,1],"data":[["Python",18],["C++",17]]}'

In [169]:
mydf.to_json(path, orient='split')
pd.read_json(path)

Unnamed: 0,columns,index,data
0,Name,0,"[Python, 18]"
1,Score,1,"[C++, 17]"


### HTML 

HTML : Hypertext Markup Language

In [170]:
mystr = """
<table>
  <thead>
    <tr>
      <th>name</th>
      <th>score</th>
    </tr>
  </thead> 
  <tbody>
    <tr>
      <td>Ali</td>
      <td>12</td>
    </tr>
    <tr>
      <td>Sara</td>
      <td>18</td>
    </tr>    
   </tbody>   
</table>
"""

In [171]:
lst = pd.read_html(mystr)

  lst = pd.read_html(mystr)


In [172]:
lst[0]

Unnamed: 0,name,score
0,Ali,12
1,Sara,18


In [173]:
r = pd.read_html('d:Files/h_test.html')
r[0]

Unnamed: 0,name,score
0,Ali,12
1,Sara,18


In [174]:
r[0].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    2 non-null      object
 1   score   2 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 164.0+ bytes


### XML

XML : Extensible Markup Language

In [175]:
from lxml import objectify  

In [176]:
f = open('d:Files/book.xml')
r = objectify.parse(f).getroot()
data = []

for i in r.C:
    x = {}
    for c in i.getchildren():
        x[c.tag] = c.pyval
    data.append(x)

In [177]:
pd.DataFrame(data)

Unnamed: 0,author,title,price
0,Shirafkan,C++,60
1,Sara,Java,50


In [178]:
#---------------

In [179]:
from lxml import etree as et

In [180]:
s = '''
<Book>
<C>
  <author>Shirafkan</author>
  <title>C++</title>
  <price>60</price>
</C>
<C>
  <author>Sara</author>
  <title>Java</title>
  <price>50</price>
</C>
</Book>
'''

In [181]:
root = et.XML(s)

In [182]:
print(len(root))
print(list(root))
print(root.find("C"))

2
[<Element C at 0x182dab66400>, <Element C at 0x182dab66040>]
<Element C at 0x182dab66040>


In [183]:
print(et.tostring(root))

b'<Book>\n<C>\n  <author>Shirafkan</author>\n  <title>C++</title>\n  <price>60</price>\n</C>\n<C>\n  <author>Sara</author>\n  <title>Java</title>\n  <price>50</price>\n</C>\n</Book>'


In [184]:
et.tostring(root, xml_declaration=True).decode('utf-8')

"<?xml version='1.0' encoding='ASCII'?>\n<Book>\n<C>\n  <author>Shirafkan</author>\n  <title>C++</title>\n  <price>60</price>\n</C>\n<C>\n  <author>Sara</author>\n  <title>Java</title>\n  <price>50</price>\n</C>\n</Book>"

### HDF

HDF : Hierarchical Data Format

In [185]:
import tables

In [186]:
mydf = pd.DataFrame({'A': [18, 22], 'B': [3, 15]}); mydf

Unnamed: 0,A,B
0,18,3
1,22,15


In [187]:
path = 'd:\Files\data.h5'

In [188]:
mydf.to_hdf(path, key='df', mode='w')

In [189]:
pd.read_hdf(path, 'df')

Unnamed: 0,A,B
0,18,3
1,22,15


## miscellaneous

### read_table

In [190]:
pd.read_csv('d:Files/score.csv')

Unnamed: 0,name,age,Score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


In [191]:
pd.read_table('d:Files/score.csv')

Unnamed: 0,"name,age,Score"
0,"Ali,27,19"
1,"Sara,24,18"
2,"Taha,25,20"
3,"Omid,26,13"


In [192]:
pd.read_table('d:Files/score.csv', sep=',')

Unnamed: 0,name,age,Score
0,Ali,27,19
1,Sara,24,18
2,Taha,25,20
3,Omid,26,13


In [193]:
pd.read_table('d:Files/mytext.txt')

Unnamed: 0,A B C
0,row1 1 2 3
1,row2 4 5 6


In [194]:
pd.read_table('d:Files/mytext.txt', sep='\s+')

Unnamed: 0,A,B,C
row1,1,2,3
row2,4,5,6


### sqlite3

In [195]:
# Interacting with Databases

In [196]:
import sqlite3

In [197]:
query = """ CREATE TABLE AgeTable (Name VARCHAR(20), Age INTEGER); """

In [198]:
con = sqlite3.connect('d:Files/mydata.sqlite')
con.execute(query)
con.commit()

In [199]:
data = [('Ali', 37), ('Taha', 25), ('Sara', 40)]
q = "INSERT INTO AgeTable VALUES(?, ?)"
con.executemany(q, data)
con.commit()

In [200]:
q = 'select * from AgeTable'
mydata = con.execute(q)
col = [x[0] for x in mydata.description]

In [201]:
pd.DataFrame(mydata.fetchall(), columns=col)

Unnamed: 0,Name,Age
0,Ali,37
1,Taha,25
2,Sara,40


### requests

In [202]:
import requests

In [203]:
r = requests.get('https://api.github.com/repos/pandas-dev/pandas/issues')

In [204]:
if r.status_code == 200:
    print('Success!')
elif r.status_code == 404:
    print('Not Found.')

Success!


In [205]:
df = pd.DataFrame(r.json(), columns=['number', 'title'])
df.head()

Unnamed: 0,number,title
0,59392,ENH: pandas-dev#59237
1,59391,ENH: Implement option 'truncate' of argument `...
2,59390,ENH: Add option for DataFrame.compare and Seri...
3,59389,ENH: support for non-nanosecond precision in p...
4,59388,String dtype: use 'str' string alias and repre...
