# Pandas Lib

> Source = https://pandas.pydata.org/docs/user_guide/10min.html

In [1]:
import pandas as pd
import numpy as np

In [2]:
series1 = pd.Series([1, 2, 3, 'Malihe', 0, np.nan, -1])
series1

0         1
1         2
2         3
3    Malihe
4         0
5       NaN
6        -1
dtype: object

In [3]:
type(series1[3])

str

In [4]:
series1[len(series1) - 1]

-1

In [5]:
type(series1[len(series1) - 1])

int

In [6]:
type(series1)

pandas.core.series.Series

In [7]:
dates = pd.date_range("20241015", periods = 7)
dates

DatetimeIndex(['2024-10-15', '2024-10-16', '2024-10-17', '2024-10-18',
               '2024-10-19', '2024-10-20', '2024-10-21'],
              dtype='datetime64[ns]', freq='D')

In [8]:
print([d for d in dates])

[Timestamp('2024-10-15 00:00:00'), Timestamp('2024-10-16 00:00:00'), Timestamp('2024-10-17 00:00:00'), Timestamp('2024-10-18 00:00:00'), Timestamp('2024-10-19 00:00:00'), Timestamp('2024-10-20 00:00:00'), Timestamp('2024-10-21 00:00:00')]


In [9]:
print('\n'.join(map(str, dates)))
# map(function, iterable): map takes a function and an iterable (like a list), and applies the function to each element in the iterable.

2024-10-15 00:00:00
2024-10-16 00:00:00
2024-10-17 00:00:00
2024-10-18 00:00:00
2024-10-19 00:00:00
2024-10-20 00:00:00
2024-10-21 00:00:00


In [10]:
for d in dates:
    print(d)

2024-10-15 00:00:00
2024-10-16 00:00:00
2024-10-17 00:00:00
2024-10-18 00:00:00
2024-10-19 00:00:00
2024-10-20 00:00:00
2024-10-21 00:00:00


## DataFrame

In [11]:
df = pd.DataFrame(np.random.randn(7, 4), index = dates, columns = ['A', 'B', 'C', 'D'])
# np.random.randn: This function generates random numbers from a standard normal distribution (Gaussian distribution with mean 0 and variance 1).

In [12]:
df

Unnamed: 0,A,B,C,D
2024-10-15,-1.281793,0.358641,-1.266926,0.985365
2024-10-16,-0.508918,0.134144,0.472175,1.623367
2024-10-17,0.59004,-0.061607,0.125571,-2.003038
2024-10-18,-2.012784,0.01158,1.82253,-0.386665
2024-10-19,1.754172,-0.375431,-2.147462,1.230035
2024-10-20,-0.295957,-1.286849,-1.289448,-0.931113
2024-10-21,-0.476906,-1.512358,1.300378,-0.052495


In [13]:
df.mean()

A   -0.318878
B   -0.390269
C   -0.140454
D    0.066494
dtype: float64

In [14]:
df.mean().mean()

-0.19577684449044672

In [15]:
type(df)

pandas.core.frame.DataFrame

In [16]:
dates[1]

Timestamp('2024-10-16 00:00:00')

In [17]:
df.dtypes

A    float64
B    float64
C    float64
D    float64
dtype: object

In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 7 entries, 2024-10-15 to 2024-10-21
Freq: D
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       7 non-null      float64
 1   B       7 non-null      float64
 2   C       7 non-null      float64
 3   D       7 non-null      float64
dtypes: float64(4)
memory usage: 280.0 bytes


In [19]:
 df['A']

2024-10-15   -1.281793
2024-10-16   -0.508918
2024-10-17    0.590040
2024-10-18   -2.012784
2024-10-19    1.754172
2024-10-20   -0.295957
2024-10-21   -0.476906
Freq: D, Name: A, dtype: float64

In [20]:
df

Unnamed: 0,A,B,C,D
2024-10-15,-1.281793,0.358641,-1.266926,0.985365
2024-10-16,-0.508918,0.134144,0.472175,1.623367
2024-10-17,0.59004,-0.061607,0.125571,-2.003038
2024-10-18,-2.012784,0.01158,1.82253,-0.386665
2024-10-19,1.754172,-0.375431,-2.147462,1.230035
2024-10-20,-0.295957,-1.286849,-1.289448,-0.931113
2024-10-21,-0.476906,-1.512358,1.300378,-0.052495


In [21]:
df.head()

Unnamed: 0,A,B,C,D
2024-10-15,-1.281793,0.358641,-1.266926,0.985365
2024-10-16,-0.508918,0.134144,0.472175,1.623367
2024-10-17,0.59004,-0.061607,0.125571,-2.003038
2024-10-18,-2.012784,0.01158,1.82253,-0.386665
2024-10-19,1.754172,-0.375431,-2.147462,1.230035


In [22]:
df.head(3)

Unnamed: 0,A,B,C,D
2024-10-15,-1.281793,0.358641,-1.266926,0.985365
2024-10-16,-0.508918,0.134144,0.472175,1.623367
2024-10-17,0.59004,-0.061607,0.125571,-2.003038


In [23]:
df.tail()

Unnamed: 0,A,B,C,D
2024-10-17,0.59004,-0.061607,0.125571,-2.003038
2024-10-18,-2.012784,0.01158,1.82253,-0.386665
2024-10-19,1.754172,-0.375431,-2.147462,1.230035
2024-10-20,-0.295957,-1.286849,-1.289448,-0.931113
2024-10-21,-0.476906,-1.512358,1.300378,-0.052495


In [24]:
df.tail(3)

Unnamed: 0,A,B,C,D
2024-10-19,1.754172,-0.375431,-2.147462,1.230035
2024-10-20,-0.295957,-1.286849,-1.289448,-0.931113
2024-10-21,-0.476906,-1.512358,1.300378,-0.052495


In [25]:
df.describe()

Unnamed: 0,A,B,C,D
count,7.0,7.0,7.0,7.0
mean,-0.318878,-0.390269,-0.140454,0.066494
std,1.223506,0.726698,1.471541,1.298698
min,-2.012784,-1.512358,-2.147462,-2.003038
25%,-0.895356,-0.83114,-1.278187,-0.658889
50%,-0.476906,-0.061607,0.125571,-0.052495
75%,0.147041,0.072862,0.886277,1.1077
max,1.754172,0.358641,1.82253,1.623367


In [26]:
df.mean()

A   -0.318878
B   -0.390269
C   -0.140454
D    0.066494
dtype: float64

In [27]:
df['A'].mean()

-0.31887809130496997

In [28]:
df['D'].max()

1.6233673653031149

In [29]:
df[1 : 3]

Unnamed: 0,A,B,C,D
2024-10-16,-0.508918,0.134144,0.472175,1.623367
2024-10-17,0.59004,-0.061607,0.125571,-2.003038


In [30]:
df[1 : 3]['B']

2024-10-16    0.134144
2024-10-17   -0.061607
Freq: D, Name: B, dtype: float64

In [31]:
df_t = df.T

In [32]:
df_t

Unnamed: 0,2024-10-15,2024-10-16,2024-10-17,2024-10-18,2024-10-19,2024-10-20,2024-10-21
A,-1.281793,-0.508918,0.59004,-2.012784,1.754172,-0.295957,-0.476906
B,0.358641,0.134144,-0.061607,0.01158,-0.375431,-1.286849,-1.512358
C,-1.266926,0.472175,0.125571,1.82253,-2.147462,-1.289448,1.300378
D,0.985365,1.623367,-2.003038,-0.386665,1.230035,-0.931113,-0.052495


In [33]:
df.sort_values(by = "B")

Unnamed: 0,A,B,C,D
2024-10-21,-0.476906,-1.512358,1.300378,-0.052495
2024-10-20,-0.295957,-1.286849,-1.289448,-0.931113
2024-10-19,1.754172,-0.375431,-2.147462,1.230035
2024-10-17,0.59004,-0.061607,0.125571,-2.003038
2024-10-18,-2.012784,0.01158,1.82253,-0.386665
2024-10-16,-0.508918,0.134144,0.472175,1.623367
2024-10-15,-1.281793,0.358641,-1.266926,0.985365


In [34]:
df[df['A'] > 0]

Unnamed: 0,A,B,C,D
2024-10-17,0.59004,-0.061607,0.125571,-2.003038
2024-10-19,1.754172,-0.375431,-2.147462,1.230035


In [35]:
df2 = pd.DataFrame(
    {
        "A" : 1.0,
        "B" : pd.Timestamp("20241016"),
        "C" : pd.Series(1, index = list(range(4)), dtype = "float32"),
        "D" : np.array([3] * 4, dtype = "int32"),
        "E" : pd.Categorical(["test", "train", "test", "train"]),
        "F" : "foo",
    }
)

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2024-10-16,1.0,3,test,foo
1,1.0,2024-10-16,1.0,3,train,foo
2,1.0,2024-10-16,1.0,3,test,foo
3,1.0,2024-10-16,1.0,3,train,foo


In [36]:
arr = np.array([3] * 10, dtype = "int32")
arr

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [37]:
df2.C.index

Index([0, 1, 2, 3], dtype='int64')

In [38]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [39]:
df2.index

Index([0, 1, 2, 3], dtype='int64')

In [40]:
df2.to_numpy()

array([[1.0, Timestamp('2024-10-16 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2024-10-16 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2024-10-16 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2024-10-16 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [41]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2024-10-16,1.0,3,test,foo
1,1.0,2024-10-16,1.0,3,train,foo
2,1.0,2024-10-16,1.0,3,test,foo
3,1.0,2024-10-16,1.0,3,train,foo


In [42]:
df2['A']

0    1.0
1    1.0
2    1.0
3    1.0
Name: A, dtype: float64

In [43]:
df2.A

0    1.0
1    1.0
2    1.0
3    1.0
Name: A, dtype: float64

In [44]:
df2[1 : 3]

Unnamed: 0,A,B,C,D,E,F
1,1.0,2024-10-16,1.0,3,train,foo
2,1.0,2024-10-16,1.0,3,test,foo


In [45]:
df

Unnamed: 0,A,B,C,D
2024-10-15,-1.281793,0.358641,-1.266926,0.985365
2024-10-16,-0.508918,0.134144,0.472175,1.623367
2024-10-17,0.59004,-0.061607,0.125571,-2.003038
2024-10-18,-2.012784,0.01158,1.82253,-0.386665
2024-10-19,1.754172,-0.375431,-2.147462,1.230035
2024-10-20,-0.295957,-1.286849,-1.289448,-0.931113
2024-10-21,-0.476906,-1.512358,1.300378,-0.052495


In [46]:
df["20241018" : "20241020"]

Unnamed: 0,A,B,C,D
2024-10-18,-2.012784,0.01158,1.82253,-0.386665
2024-10-19,1.754172,-0.375431,-2.147462,1.230035
2024-10-20,-0.295957,-1.286849,-1.289448,-0.931113


In [47]:
df.loc[dates[0]]

A   -1.281793
B    0.358641
C   -1.266926
D    0.985365
Name: 2024-10-15 00:00:00, dtype: float64

In [48]:
df.loc['2024-10-18']

A   -2.012784
B    0.011580
C    1.822530
D   -0.386665
Name: 2024-10-18 00:00:00, dtype: float64

In [49]:
df.loc[:, ["B", "D"]]

Unnamed: 0,B,D
2024-10-15,0.358641,0.985365
2024-10-16,0.134144,1.623367
2024-10-17,-0.061607,-2.003038
2024-10-18,0.01158,-0.386665
2024-10-19,-0.375431,1.230035
2024-10-20,-1.286849,-0.931113
2024-10-21,-1.512358,-0.052495


In [50]:
df.loc["2024-10-20" : "2024-10-24", ["A", "B"]]

Unnamed: 0,A,B
2024-10-20,-0.295957,-1.286849
2024-10-21,-0.476906,-1.512358


In [51]:
df.loc[dates[0], "A"]

-1.2817934657649879

In [52]:
df.at[dates[0], "A"]

-1.2817934657649879

In [53]:
df

Unnamed: 0,A,B,C,D
2024-10-15,-1.281793,0.358641,-1.266926,0.985365
2024-10-16,-0.508918,0.134144,0.472175,1.623367
2024-10-17,0.59004,-0.061607,0.125571,-2.003038
2024-10-18,-2.012784,0.01158,1.82253,-0.386665
2024-10-19,1.754172,-0.375431,-2.147462,1.230035
2024-10-20,-0.295957,-1.286849,-1.289448,-0.931113
2024-10-21,-0.476906,-1.512358,1.300378,-0.052495


In [54]:
df.iloc[3]

A   -2.012784
B    0.011580
C    1.822530
D   -0.386665
Name: 2024-10-18 00:00:00, dtype: float64

In [55]:
df.iloc[3].dtypes

dtype('float64')

In [56]:
type(df.iloc[3])

pandas.core.series.Series

In [57]:
df.iloc[3 : 5, 0 : 2]

Unnamed: 0,A,B
2024-10-18,-2.012784,0.01158
2024-10-19,1.754172,-0.375431


In [58]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2024-10-16,-0.508918,0.472175
2024-10-17,0.59004,0.125571
2024-10-19,1.754172,-2.147462


In [59]:
df.iloc[1 : 3, :]

Unnamed: 0,A,B,C,D
2024-10-16,-0.508918,0.134144,0.472175,1.623367
2024-10-17,0.59004,-0.061607,0.125571,-2.003038


In [60]:
df.iloc[:, 1 : 3]

Unnamed: 0,B,C
2024-10-15,0.358641,-1.266926
2024-10-16,0.134144,0.472175
2024-10-17,-0.061607,0.125571
2024-10-18,0.01158,1.82253
2024-10-19,-0.375431,-2.147462
2024-10-20,-1.286849,-1.289448
2024-10-21,-1.512358,1.300378


In [61]:
df.iloc[1, 1]

0.1341436352437352

In [62]:
df

Unnamed: 0,A,B,C,D
2024-10-15,-1.281793,0.358641,-1.266926,0.985365
2024-10-16,-0.508918,0.134144,0.472175,1.623367
2024-10-17,0.59004,-0.061607,0.125571,-2.003038
2024-10-18,-2.012784,0.01158,1.82253,-0.386665
2024-10-19,1.754172,-0.375431,-2.147462,1.230035
2024-10-20,-0.295957,-1.286849,-1.289448,-0.931113
2024-10-21,-0.476906,-1.512358,1.300378,-0.052495


In [63]:
df.iat[1, 1]

0.1341436352437352

In [64]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2024-10-17,0.59004,-0.061607,0.125571,-2.003038
2024-10-19,1.754172,-0.375431,-2.147462,1.230035


In [65]:
df[df > 0]

Unnamed: 0,A,B,C,D
2024-10-15,,0.358641,,0.985365
2024-10-16,,0.134144,0.472175,1.623367
2024-10-17,0.59004,,0.125571,
2024-10-18,,0.01158,1.82253,
2024-10-19,1.754172,,,1.230035
2024-10-20,,,,
2024-10-21,,,1.300378,


<div style="text-align: center;">

### Comparison of `NaN` and `None` in Python

</div>

| Feature                   | `NaN` (from `float('nan')` or `numpy.nan`)       | `None` (Null)                              |
|---------------------------|--------------------------------------------------|--------------------------------------------|
| **Meaning**               | Represents an invalid or unknown numeric value   | Represents the absence of a value or data  |
| **Data Type**             | `float`                                          | `NoneType`                                 |
| **Main Use Case**         | For invalid or unknown numeric data              | For null or missing data                   |
| **Comparability**         | `NaN != NaN`, i.e., `NaN` is not equal to itself | `None == None`, i.e., `None` is equal to itself |
| **Usage in Pandas**       | Used in DataFrame and Series to indicate invalid data | Typically used to represent null values in larger data |
| **Participation in Calculations** | Can lead to invalid calculations or results | Does not participate in numeric calculations and raises an error |
| **Boolean Conversion**    | Evaluates as `True` in boolean expressions       | Evaluates as `False` in boolean expressions |


In [66]:
df2 = df.copy()

In [67]:
df2["E"] = ["one", "two", "three", "four", "five", "six", "seven"]

In [68]:
df2

Unnamed: 0,A,B,C,D,E
2024-10-15,-1.281793,0.358641,-1.266926,0.985365,one
2024-10-16,-0.508918,0.134144,0.472175,1.623367,two
2024-10-17,0.59004,-0.061607,0.125571,-2.003038,three
2024-10-18,-2.012784,0.01158,1.82253,-0.386665,four
2024-10-19,1.754172,-0.375431,-2.147462,1.230035,five
2024-10-20,-0.295957,-1.286849,-1.289448,-0.931113,six
2024-10-21,-0.476906,-1.512358,1.300378,-0.052495,seven


In [69]:
df2[df2["E"].isin(["two", "five"])]

Unnamed: 0,A,B,C,D,E
2024-10-16,-0.508918,0.134144,0.472175,1.623367,two
2024-10-19,1.754172,-0.375431,-2.147462,1.230035,five


In [70]:
df

Unnamed: 0,A,B,C,D
2024-10-15,-1.281793,0.358641,-1.266926,0.985365
2024-10-16,-0.508918,0.134144,0.472175,1.623367
2024-10-17,0.59004,-0.061607,0.125571,-2.003038
2024-10-18,-2.012784,0.01158,1.82253,-0.386665
2024-10-19,1.754172,-0.375431,-2.147462,1.230035
2024-10-20,-0.295957,-1.286849,-1.289448,-0.931113
2024-10-21,-0.476906,-1.512358,1.300378,-0.052495


In [71]:
len(df)

7

In [72]:
s1 = pd.Series([1, 2, 3, 4, 5, 6], index = pd.date_range("20240101", periods = 6))

In [73]:
s1

2024-01-01    1
2024-01-02    2
2024-01-03    3
2024-01-04    4
2024-01-05    5
2024-01-06    6
Freq: D, dtype: int64

In [74]:
df.at[dates[0], "A"] = 0

In [75]:
df

Unnamed: 0,A,B,C,D
2024-10-15,0.0,0.358641,-1.266926,0.985365
2024-10-16,-0.508918,0.134144,0.472175,1.623367
2024-10-17,0.59004,-0.061607,0.125571,-2.003038
2024-10-18,-2.012784,0.01158,1.82253,-0.386665
2024-10-19,1.754172,-0.375431,-2.147462,1.230035
2024-10-20,-0.295957,-1.286849,-1.289448,-0.931113
2024-10-21,-0.476906,-1.512358,1.300378,-0.052495


In [76]:
df.iat[0, 1] = 0

In [77]:
df

Unnamed: 0,A,B,C,D
2024-10-15,0.0,0.0,-1.266926,0.985365
2024-10-16,-0.508918,0.134144,0.472175,1.623367
2024-10-17,0.59004,-0.061607,0.125571,-2.003038
2024-10-18,-2.012784,0.01158,1.82253,-0.386665
2024-10-19,1.754172,-0.375431,-2.147462,1.230035
2024-10-20,-0.295957,-1.286849,-1.289448,-0.931113
2024-10-21,-0.476906,-1.512358,1.300378,-0.052495


In [78]:
df.loc[:, "D"] = np.array([5] * len(df))

In [79]:
len(df)

7

In [80]:
df

Unnamed: 0,A,B,C,D
2024-10-15,0.0,0.0,-1.266926,5.0
2024-10-16,-0.508918,0.134144,0.472175,5.0
2024-10-17,0.59004,-0.061607,0.125571,5.0
2024-10-18,-2.012784,0.01158,1.82253,5.0
2024-10-19,1.754172,-0.375431,-2.147462,5.0
2024-10-20,-0.295957,-1.286849,-1.289448,5.0
2024-10-21,-0.476906,-1.512358,1.300378,5.0


In [81]:
arr = np.array([3] * 5)
arr

array([3, 3, 3, 3, 3])

In [82]:
arr.dtype

dtype('int32')

In [83]:
type(arr)

numpy.ndarray

In [84]:
len(arr)

5

In [85]:
df2 = df.copy()
df2[df2 > 0] = - df2
df, df2

(                   A         B         C    D
 2024-10-15  0.000000  0.000000 -1.266926  5.0
 2024-10-16 -0.508918  0.134144  0.472175  5.0
 2024-10-17  0.590040 -0.061607  0.125571  5.0
 2024-10-18 -2.012784  0.011580  1.822530  5.0
 2024-10-19  1.754172 -0.375431 -2.147462  5.0
 2024-10-20 -0.295957 -1.286849 -1.289448  5.0
 2024-10-21 -0.476906 -1.512358  1.300378  5.0,
                    A         B         C    D
 2024-10-15  0.000000  0.000000 -1.266926 -5.0
 2024-10-16 -0.508918 -0.134144 -0.472175 -5.0
 2024-10-17 -0.590040 -0.061607 -0.125571 -5.0
 2024-10-18 -2.012784 -0.011580 -1.822530 -5.0
 2024-10-19 -1.754172 -0.375431 -2.147462 -5.0
 2024-10-20 -0.295957 -1.286849 -1.289448 -5.0
 2024-10-21 -0.476906 -1.512358 -1.300378 -5.0)

In [86]:
df2[df2 > 0]

Unnamed: 0,A,B,C,D
2024-10-15,,,,
2024-10-16,,,,
2024-10-17,,,,
2024-10-18,,,,
2024-10-19,,,,
2024-10-20,,,,
2024-10-21,,,,


In [87]:
df2[df2 == 0]

Unnamed: 0,A,B,C,D
2024-10-15,0.0,0.0,,
2024-10-16,,,,
2024-10-17,,,,
2024-10-18,,,,
2024-10-19,,,,
2024-10-20,,,,
2024-10-21,,,,


In [88]:
df

Unnamed: 0,A,B,C,D
2024-10-15,0.0,0.0,-1.266926,5.0
2024-10-16,-0.508918,0.134144,0.472175,5.0
2024-10-17,0.59004,-0.061607,0.125571,5.0
2024-10-18,-2.012784,0.01158,1.82253,5.0
2024-10-19,1.754172,-0.375431,-2.147462,5.0
2024-10-20,-0.295957,-1.286849,-1.289448,5.0
2024-10-21,-0.476906,-1.512358,1.300378,5.0


In [89]:
df1 = df.reindex(index = dates[0 : 4], columns = list(df.columns) + ["E", "F"])

In [90]:
df1

Unnamed: 0,A,B,C,D,E,F
2024-10-15,0.0,0.0,-1.266926,5.0,,
2024-10-16,-0.508918,0.134144,0.472175,5.0,,
2024-10-17,0.59004,-0.061607,0.125571,5.0,,
2024-10-18,-2.012784,0.01158,1.82253,5.0,,


In [91]:
df1.loc[dates[0] : dates[1], "E"] = 1

In [92]:
df1

Unnamed: 0,A,B,C,D,E,F
2024-10-15,0.0,0.0,-1.266926,5.0,1.0,
2024-10-16,-0.508918,0.134144,0.472175,5.0,1.0,
2024-10-17,0.59004,-0.061607,0.125571,5.0,,
2024-10-18,-2.012784,0.01158,1.82253,5.0,,


In [93]:
df1.dropna(how = "any")   # By default, it operates on rows (axis = 0)
# how="any": This specifies that rows (or columns) will be removed if any of the values in that row (or column) are missing. 
# If at least one NaN or None is present, the entire row (or column) is dropped.

Unnamed: 0,A,B,C,D,E,F


In [94]:
# if you want to apply it to columns instead, you can add axis=1
df1.dropna(how = "any", axis = 1)

Unnamed: 0,A,B,C,D
2024-10-15,0.0,0.0,-1.266926,5.0
2024-10-16,-0.508918,0.134144,0.472175,5.0
2024-10-17,0.59004,-0.061607,0.125571,5.0
2024-10-18,-2.012784,0.01158,1.82253,5.0


In [95]:
df1

Unnamed: 0,A,B,C,D,E,F
2024-10-15,0.0,0.0,-1.266926,5.0,1.0,
2024-10-16,-0.508918,0.134144,0.472175,5.0,1.0,
2024-10-17,0.59004,-0.061607,0.125571,5.0,,
2024-10-18,-2.012784,0.01158,1.82253,5.0,,


In [96]:
df1.fillna(value = 3)

Unnamed: 0,A,B,C,D,E,F
2024-10-15,0.0,0.0,-1.266926,5.0,1.0,3.0
2024-10-16,-0.508918,0.134144,0.472175,5.0,1.0,3.0
2024-10-17,0.59004,-0.061607,0.125571,5.0,3.0,3.0
2024-10-18,-2.012784,0.01158,1.82253,5.0,3.0,3.0


In [97]:
df1

Unnamed: 0,A,B,C,D,E,F
2024-10-15,0.0,0.0,-1.266926,5.0,1.0,
2024-10-16,-0.508918,0.134144,0.472175,5.0,1.0,
2024-10-17,0.59004,-0.061607,0.125571,5.0,,
2024-10-18,-2.012784,0.01158,1.82253,5.0,,


In [98]:
df1.isna()

Unnamed: 0,A,B,C,D,E,F
2024-10-15,False,False,False,False,False,True
2024-10-16,False,False,False,False,False,True
2024-10-17,False,False,False,False,True,True
2024-10-18,False,False,False,False,True,True


In [99]:
pd.isna(df1)

Unnamed: 0,A,B,C,D,E,F
2024-10-15,False,False,False,False,False,True
2024-10-16,False,False,False,False,False,True
2024-10-17,False,False,False,False,True,True
2024-10-18,False,False,False,False,True,True


In [100]:
df

Unnamed: 0,A,B,C,D
2024-10-15,0.0,0.0,-1.266926,5.0
2024-10-16,-0.508918,0.134144,0.472175,5.0
2024-10-17,0.59004,-0.061607,0.125571,5.0
2024-10-18,-2.012784,0.01158,1.82253,5.0
2024-10-19,1.754172,-0.375431,-2.147462,5.0
2024-10-20,-0.295957,-1.286849,-1.289448,5.0
2024-10-21,-0.476906,-1.512358,1.300378,5.0


In [101]:
df.mean()   # By Default, axis = 0

A   -0.135765
B   -0.441503
C   -0.140454
D    5.000000
dtype: float64

In [102]:
df.mean(axis = 0)

A   -0.135765
B   -0.441503
C   -0.140454
D    5.000000
dtype: float64

In [103]:
df.mean(axis = 1)

2024-10-15    0.933269
2024-10-16    1.274350
2024-10-17    1.413501
2024-10-18    1.205331
2024-10-19    1.057820
2024-10-20    0.531936
2024-10-21    1.077778
Freq: D, dtype: float64

In [104]:
s = pd.Series([1, 2 , 3, np.nan, 4, 5, "Malihe"], index = dates)

In [105]:
s

2024-10-15         1
2024-10-16         2
2024-10-17         3
2024-10-18       NaN
2024-10-19         4
2024-10-20         5
2024-10-21    Malihe
Freq: D, dtype: object

In [106]:
s1 = s.shift(2)

In [107]:
s1

2024-10-15    None
2024-10-16    None
2024-10-17       1
2024-10-18       2
2024-10-19       3
2024-10-20     NaN
2024-10-21       4
Freq: D, dtype: object

In [108]:
s1['2024-10-17']

1

In [109]:
type(s1['2024-10-20'])

float

In [110]:
type(s1["2024-10-15"])

NoneType

In [111]:
type(s1["2024-10-21"])

int

In [112]:
type(s)

pandas.core.series.Series

In [113]:
df

Unnamed: 0,A,B,C,D
2024-10-15,0.0,0.0,-1.266926,5.0
2024-10-16,-0.508918,0.134144,0.472175,5.0
2024-10-17,0.59004,-0.061607,0.125571,5.0
2024-10-18,-2.012784,0.01158,1.82253,5.0
2024-10-19,1.754172,-0.375431,-2.147462,5.0
2024-10-20,-0.295957,-1.286849,-1.289448,5.0
2024-10-21,-0.476906,-1.512358,1.300378,5.0


### User defined functions

In [114]:
df.agg(lambda x : np.mean(x))

A   -0.135765
B   -0.441503
C   -0.140454
D    5.000000
dtype: float64

In [115]:
df.agg(lambda x : np.mean(x) * 5.6)

A    -0.760283
B    -2.472418
C    -0.786545
D    28.000000
dtype: float64

In [116]:
df.transform(lambda x : x * 101.2)

Unnamed: 0,A,B,C,D
2024-10-15,0.0,0.0,-128.212885,506.0
2024-10-16,-51.502543,13.575336,47.784152,506.0
2024-10-17,59.712053,-6.234678,12.70778,506.0
2024-10-18,-203.693734,1.17188,184.440031,506.0
2024-10-19,177.522232,-37.993587,-217.323106,506.0
2024-10-20,-29.950859,-130.229132,-130.492186,506.0
2024-10-21,-48.26289,-153.050672,131.598255,506.0


In [117]:
df.agg(lambda x : x * 101.2)

Unnamed: 0,A,B,C,D
2024-10-15,0.0,0.0,-128.212885,506.0
2024-10-16,-51.502543,13.575336,47.784152,506.0
2024-10-17,59.712053,-6.234678,12.70778,506.0
2024-10-18,-203.693734,1.17188,184.440031,506.0
2024-10-19,177.522232,-37.993587,-217.323106,506.0
2024-10-20,-29.950859,-130.229132,-130.492186,506.0
2024-10-21,-48.26289,-153.050672,131.598255,506.0


In [118]:
df.agg(['sum', 'mean'], axis = 1)

Unnamed: 0,sum,mean
2024-10-15,3.733074,0.933269
2024-10-16,5.097401,1.27435
2024-10-17,5.654004,1.413501
2024-10-18,4.821326,1.205331
2024-10-19,4.23128,1.05782
2024-10-20,2.127745,0.531936
2024-10-21,4.311114,1.077778


In [119]:
df.transform(lambda x : x + 10)

Unnamed: 0,A,B,C,D
2024-10-15,10.0,10.0,8.733074,15.0
2024-10-16,9.491082,10.134144,10.472175,15.0
2024-10-17,10.59004,9.938393,10.125571,15.0
2024-10-18,7.987216,10.01158,11.82253,15.0
2024-10-19,11.754172,9.624569,7.852538,15.0
2024-10-20,9.704043,8.713151,8.710552,15.0
2024-10-21,9.523094,8.487642,11.300378,15.0


In [120]:
s = pd.Series(np.random.randint(0, 7, size = 10))
s

0    1
1    2
2    6
3    3
4    1
5    2
6    0
7    0
8    3
9    0
dtype: int32

In [121]:
s.value_counts()

0    3
1    2
2    2
3    2
6    1
Name: count, dtype: int64

In [122]:
s = pd.Series(["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"])

In [123]:
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

In [124]:
df = pd.DataFrame(np.random.randn(10, 4))
df

Unnamed: 0,0,1,2,3
0,0.38758,-0.924829,-0.939689,1.104042
1,-1.205748,0.954757,-1.225098,0.451506
2,0.495104,-0.630885,0.282087,0.375195
3,-0.1918,-1.214893,-1.010972,0.633715
4,1.058585,2.592693,0.544061,-0.389773
5,0.418606,-1.028078,-1.247698,-0.012559
6,0.019153,2.329312,-1.138677,0.085453
7,-0.586353,0.381554,-1.091102,0.280452
8,-0.15587,0.252613,-0.120166,2.021718
9,0.308568,1.667844,0.682308,0.619363


In [125]:
pieces = [df[ : 3], df[3 : 7], df[7 : ]]
pieces

[          0         1         2         3
 0  0.387580 -0.924829 -0.939689  1.104042
 1 -1.205748  0.954757 -1.225098  0.451506
 2  0.495104 -0.630885  0.282087  0.375195,
           0         1         2         3
 3 -0.191800 -1.214893 -1.010972  0.633715
 4  1.058585  2.592693  0.544061 -0.389773
 5  0.418606 -1.028078 -1.247698 -0.012559
 6  0.019153  2.329312 -1.138677  0.085453,
           0         1         2         3
 7 -0.586353  0.381554 -1.091102  0.280452
 8 -0.155870  0.252613 -0.120166  2.021718
 9  0.308568  1.667844  0.682308  0.619363]

In [126]:
conc = pd.concat(pieces)
conc

Unnamed: 0,0,1,2,3
0,0.38758,-0.924829,-0.939689,1.104042
1,-1.205748,0.954757,-1.225098,0.451506
2,0.495104,-0.630885,0.282087,0.375195
3,-0.1918,-1.214893,-1.010972,0.633715
4,1.058585,2.592693,0.544061,-0.389773
5,0.418606,-1.028078,-1.247698,-0.012559
6,0.019153,2.329312,-1.138677,0.085453
7,-0.586353,0.381554,-1.091102,0.280452
8,-0.15587,0.252613,-0.120166,2.021718
9,0.308568,1.667844,0.682308,0.619363


In [127]:
type(pieces)

list

In [128]:
type(conc)

pandas.core.frame.DataFrame

In [129]:
left1 = pd.DataFrame({"key" : ["foo", "foo"], "lval" : [1, 2]})
left1

Unnamed: 0,key,lval
0,foo,1
1,foo,2


In [130]:
right1 = pd.DataFrame({"key" : ["foo", "foo"], "rval" : [3, 4]})
right1

Unnamed: 0,key,rval
0,foo,3
1,foo,4


In [131]:
pd.merge(left1, right1, on = "key")

Unnamed: 0,key,lval,rval
0,foo,1,3
1,foo,1,4
2,foo,2,3
3,foo,2,4


In [132]:
pd.merge(left1, right1)

Unnamed: 0,key,lval,rval
0,foo,1,3
1,foo,1,4
2,foo,2,3
3,foo,2,4


In [133]:
left2 = pd.DataFrame({"key" : ["foo", "bar"], "lval" : [1, 2]})
left2

Unnamed: 0,key,lval
0,foo,1
1,bar,2


In [134]:
right2 = pd.DataFrame({"key" : ["foo", "bar"], "rval" : [3, 4]})
right2

Unnamed: 0,key,rval
0,foo,3
1,bar,4


In [135]:
pd.merge(left2, right2, on = "key")

Unnamed: 0,key,lval,rval
0,foo,1,3
1,bar,2,4


In [136]:
pd.merge(left2, right2)

Unnamed: 0,key,lval,rval
0,foo,1,3
1,bar,2,4


In [137]:
df = pd.DataFrame(
    {
        "A" : ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
        "B" : ["one", "one", "two", "three", "two", "two", "one", "three"],
        "C" : np.random.randn(8),
        "D" : np.random.randn(8)
    }   
)
df

Unnamed: 0,A,B,C,D
0,foo,one,1.814603,1.721097
1,bar,one,-0.396406,-0.141689
2,foo,two,-0.170888,-0.938966
3,bar,three,-0.499496,-0.579296
4,foo,two,0.196049,0.006398
5,bar,two,0.122406,-0.171564
6,foo,one,1.248965,0.29412
7,foo,three,0.344506,0.010921


In [138]:
df.groupby("A")[["C", "D"]].sum()


Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,-0.773495,-0.892549
foo,3.433235,1.09357


In [139]:
df.groupby(["A", "B"])[["C"]].sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C
A,B,Unnamed: 2_level_1
bar,one,-0.396406
bar,three,-0.499496
bar,two,0.122406
foo,one,3.063568
foo,three,0.344506
foo,two,0.025162


In [140]:
arrays = [
    ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
    ["one", "two", "one", "two", "one", "two", "one", "two"]
]
arrays

[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
 ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]

In [141]:
len(arrays)

2

In [142]:
type(arrays)

list

In [143]:
index = pd.MultiIndex.from_arrays(arrays, names = ["first", "second"])
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [144]:
df = pd.DataFrame(np.random.randn(8, 3), index = index, columns = ["A", "B", "C"])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.988268,-0.61291,-1.230822
bar,two,-0.025548,1.826282,-0.088599
baz,one,-0.94995,-0.408047,1.362447
baz,two,0.395107,0.095277,-0.707378
foo,one,-0.884275,0.323158,-0.33465
foo,two,-1.180846,-1.092718,-0.218671
qux,one,1.889797,-0.864348,-2.34789
qux,two,-2.39996,0.96785,-0.552458


In [145]:
stacked = df.stack(future_stack = True)
stacked

first  second   
bar    one     A    0.988268
               B   -0.612910
               C   -1.230822
       two     A   -0.025548
               B    1.826282
               C   -0.088599
baz    one     A   -0.949950
               B   -0.408047
               C    1.362447
       two     A    0.395107
               B    0.095277
               C   -0.707378
foo    one     A   -0.884275
               B    0.323158
               C   -0.334650
       two     A   -1.180846
               B   -1.092718
               C   -0.218671
qux    one     A    1.889797
               B   -0.864348
               C   -2.347890
       two     A   -2.399960
               B    0.967850
               C   -0.552458
dtype: float64

In [146]:
stacked.shape

(24,)

In [147]:
stacked.iloc[0 : 7]

first  second   
bar    one     A    0.988268
               B   -0.612910
               C   -1.230822
       two     A   -0.025548
               B    1.826282
               C   -0.088599
baz    one     A   -0.949950
dtype: float64

In [148]:
stacked.iloc[0]

0.9882677859495327

In [149]:
stacked.iloc[6]

-0.9499503202265892

In [150]:
stacked["bar"]

second   
one     A    0.988268
        B   -0.612910
        C   -1.230822
two     A   -0.025548
        B    1.826282
        C   -0.088599
dtype: float64

In [151]:
stacked["bar"]["two"]

A   -0.025548
B    1.826282
C   -0.088599
dtype: float64

In [152]:
stacked

first  second   
bar    one     A    0.988268
               B   -0.612910
               C   -1.230822
       two     A   -0.025548
               B    1.826282
               C   -0.088599
baz    one     A   -0.949950
               B   -0.408047
               C    1.362447
       two     A    0.395107
               B    0.095277
               C   -0.707378
foo    one     A   -0.884275
               B    0.323158
               C   -0.334650
       two     A   -1.180846
               B   -1.092718
               C   -0.218671
qux    one     A    1.889797
               B   -0.864348
               C   -2.347890
       two     A   -2.399960
               B    0.967850
               C   -0.552458
dtype: float64

In [153]:
unstacked = stacked.unstack()
unstacked

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.988268,-0.61291,-1.230822
bar,two,-0.025548,1.826282,-0.088599
baz,one,-0.94995,-0.408047,1.362447
baz,two,0.395107,0.095277,-0.707378
foo,one,-0.884275,0.323158,-0.33465
foo,two,-1.180846,-1.092718,-0.218671
qux,one,1.889797,-0.864348,-2.34789
qux,two,-2.39996,0.96785,-0.552458


In [154]:
unstacked1 = stacked.unstack(1)
unstacked1

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,0.988268,-0.025548
bar,B,-0.61291,1.826282
bar,C,-1.230822,-0.088599
baz,A,-0.94995,0.395107
baz,B,-0.408047,0.095277
baz,C,1.362447,-0.707378
foo,A,-0.884275,-1.180846
foo,B,0.323158,-1.092718
foo,C,-0.33465,-0.218671
qux,A,1.889797,-2.39996


In [155]:
unstacked0 = stacked.unstack(0)
unstacked0

Unnamed: 0_level_0,first,bar,baz,foo,qux
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,A,0.988268,-0.94995,-0.884275,1.889797
one,B,-0.61291,-0.408047,0.323158,-0.864348
one,C,-1.230822,1.362447,-0.33465,-2.34789
two,A,-0.025548,0.395107,-1.180846,-2.39996
two,B,1.826282,0.095277,-1.092718,0.96785
two,C,-0.088599,-0.707378,-0.218671,-0.552458


In [156]:
df = pd.DataFrame(
    {
        "A" : ["one", "one", "two", "three"] * 3,
        "B" : ["A", "B", "C"] * 4,
        "C" : ["foo", "foo", "foo", "bar", "bar", "bar"] * 2,
        "D" : np.random.randn(12),
        "E" : np.random.randn(12)
    }
)
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,0.608358,0.648707
1,one,B,foo,-2.147429,-1.029367
2,two,C,foo,-0.107501,0.408963
3,three,A,bar,0.85489,1.396889
4,one,B,bar,-0.161653,2.788226
5,one,C,bar,1.184131,-1.216532
6,two,A,foo,0.424723,-1.482006
7,three,B,foo,-0.177372,-0.107217
8,one,C,foo,-1.400107,0.770085
9,one,A,bar,0.058146,0.850659


In [157]:
pt = pd.pivot_table(df, values = "D", index = ["A", "B"], columns = ["C"])
pt

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.058146,0.608358
one,B,-0.161653,-2.147429
one,C,1.184131,-1.400107
three,A,0.85489,
three,B,,-0.177372
three,C,0.260143,
two,A,,0.424723
two,B,1.476599,
two,C,,-0.107501


In [158]:
rng = pd.date_range("1/1/2024", periods = 100, freq = "s")
rng

DatetimeIndex(['2024-01-01 00:00:00', '2024-01-01 00:00:01',
               '2024-01-01 00:00:02', '2024-01-01 00:00:03',
               '2024-01-01 00:00:04', '2024-01-01 00:00:05',
               '2024-01-01 00:00:06', '2024-01-01 00:00:07',
               '2024-01-01 00:00:08', '2024-01-01 00:00:09',
               '2024-01-01 00:00:10', '2024-01-01 00:00:11',
               '2024-01-01 00:00:12', '2024-01-01 00:00:13',
               '2024-01-01 00:00:14', '2024-01-01 00:00:15',
               '2024-01-01 00:00:16', '2024-01-01 00:00:17',
               '2024-01-01 00:00:18', '2024-01-01 00:00:19',
               '2024-01-01 00:00:20', '2024-01-01 00:00:21',
               '2024-01-01 00:00:22', '2024-01-01 00:00:23',
               '2024-01-01 00:00:24', '2024-01-01 00:00:25',
               '2024-01-01 00:00:26', '2024-01-01 00:00:27',
               '2024-01-01 00:00:28', '2024-01-01 00:00:29',
               '2024-01-01 00:00:30', '2024-01-01 00:00:31',
               '2024-01-

In [159]:
type(rng)

pandas.core.indexes.datetimes.DatetimeIndex

In [160]:
ts = pd.Series(np.random.randint(0, 500, len(rng)), rng)
ts

2024-01-01 00:00:00    458
2024-01-01 00:00:01    263
2024-01-01 00:00:02    177
2024-01-01 00:00:03    313
2024-01-01 00:00:04     11
                      ... 
2024-01-01 00:01:35     11
2024-01-01 00:01:36    187
2024-01-01 00:01:37    494
2024-01-01 00:01:38    282
2024-01-01 00:01:39    155
Freq: s, Length: 100, dtype: int32

In [161]:
ts.resample("5Min").sum()

2024-01-01    24720
Freq: 5min, dtype: int32

In [162]:
rng2 = pd.date_range("3/6/2024 00:00", periods = 5, freq = "D")
rng2

DatetimeIndex(['2024-03-06', '2024-03-07', '2024-03-08', '2024-03-09',
               '2024-03-10'],
              dtype='datetime64[ns]', freq='D')

In [163]:
ts = pd.Series(np.random.randn(len(rng2)), rng2)
ts

2024-03-06   -0.421065
2024-03-07    0.664841
2024-03-08   -0.376946
2024-03-09    0.959321
2024-03-10    0.220258
Freq: D, dtype: float64

In [164]:
ts_utc = ts.tz_localize("UTC")
ts_utc

2024-03-06 00:00:00+00:00   -0.421065
2024-03-07 00:00:00+00:00    0.664841
2024-03-08 00:00:00+00:00   -0.376946
2024-03-09 00:00:00+00:00    0.959321
2024-03-10 00:00:00+00:00    0.220258
Freq: D, dtype: float64

In [165]:
ts_utc.tz_convert("US/Eastern")

2024-03-05 19:00:00-05:00   -0.421065
2024-03-06 19:00:00-05:00    0.664841
2024-03-07 19:00:00-05:00   -0.376946
2024-03-08 19:00:00-05:00    0.959321
2024-03-09 19:00:00-05:00    0.220258
Freq: D, dtype: float64

In [166]:
rng2

DatetimeIndex(['2024-03-06', '2024-03-07', '2024-03-08', '2024-03-09',
               '2024-03-10'],
              dtype='datetime64[ns]', freq='D')

In [174]:
rng2 + pd.offsets.BusinessDay(5)

DatetimeIndex(['2024-03-13', '2024-03-14', '2024-03-15', '2024-03-15',
               '2024-03-15'],
              dtype='datetime64[ns]', freq=None)

In [176]:
df = pd.DataFrame(
    {"id" : [1, 2, 3, 4, 5, 6],
     "raw_grade" : ["a", "b", "b", "a", "a", "e"]
    }
)
df

Unnamed: 0,id,raw_grade
0,1,a
1,2,b
2,3,b
3,4,a
4,5,a
5,6,e


In [177]:
df["grade"] = df["raw_grade"].astype("category")
df["grade"]

0    a
1    b
2    b
3    a
4    a
5    e
Name: grade, dtype: category
Categories (3, object): ['a', 'b', 'e']

In [178]:
new_categories = ["very good", "good", "very bad"]
df["grade"] = df["grade"].cat.rename_categories(new_categories)
df["grade"]

0    very good
1         good
2         good
3    very good
4    very good
5     very bad
Name: grade, dtype: category
Categories (3, object): ['very good', 'good', 'very bad']

In [179]:
df

Unnamed: 0,id,raw_grade,grade
0,1,a,very good
1,2,b,good
2,3,b,good
3,4,a,very good
4,5,a,very good
5,6,e,very bad


In [180]:
type(df["grade"])

pandas.core.series.Series

In [182]:
df["grade"] = df["grade"].cat.set_categories(
    ["very bad", "bad" ,"medium", "good", "very good"]
)
df["grade"]

0    very good
1         good
2         good
3    very good
4    very good
5     very bad
Name: grade, dtype: category
Categories (5, object): ['very bad', 'bad', 'medium', 'good', 'very good']

In [183]:
df

Unnamed: 0,id,raw_grade,grade
0,1,a,very good
1,2,b,good
2,3,b,good
3,4,a,very good
4,5,a,very good
5,6,e,very bad


In [184]:
df.sort_values(by = "grade")

Unnamed: 0,id,raw_grade,grade
5,6,e,very bad
1,2,b,good
2,3,b,good
0,1,a,very good
3,4,a,very good
4,5,a,very good


In [188]:
df.groupby("grade", observed = False).size()

grade
very bad     1
bad          0
medium       0
good         2
very good    3
dtype: int64

In [187]:
df.groupby("grade", observed = True).size()

grade
very bad     1
good         2
very good    3
dtype: int64

## Read , Write and Store Tabular Data

In [192]:
df.to_csv("mycsv.csv")

In [193]:
mycsv = pd.read_csv("mycsv.csv")

In [194]:
mycsv

Unnamed: 0.1,Unnamed: 0,id,raw_grade,grade
0,0,1,a,very good
1,1,2,b,good
2,2,3,b,good
3,3,4,a,very good
4,4,5,a,very good
5,5,6,e,very bad


In [195]:
df

Unnamed: 0,id,raw_grade,grade
0,1,a,very good
1,2,b,good
2,3,b,good
3,4,a,very good
4,5,a,very good
5,6,e,very bad


In [196]:
df.to_excel("myexcel.xlsx")

In [198]:
myexcel = pd.read_excel("myexcel.xlsx")

In [199]:
myexcel

Unnamed: 0.1,Unnamed: 0,id,raw_grade,grade
0,0,1,a,very good
1,1,2,b,good
2,2,3,b,good
3,3,4,a,very good
4,4,5,a,very good
5,5,6,e,very bad


In [200]:
type(mycsv)

pandas.core.frame.DataFrame

In [201]:
type(myexcel)

pandas.core.frame.DataFrame