# Pandas

In [1]:
import pandas as pd
print("Pandas version:", pd.__version__)

Pandas version: 2.3.3


## 1. Series: one-dimensional data

### 1.1. Creating series 

In [2]:
temp = pd.Series([22, 25, 23, 28, 24])
temp

0    22
1    25
2    23
3    28
4    24
dtype: int64

In [3]:
type(temp)

pandas.core.series.Series

In [4]:
day = ['Mon', 'Tues', 'Wed', 'Thurs', 'Fri']
temp = pd.Series([22, 25, 23, 28, 24], index=day)

temp

Mon      22
Tues     25
Wed      23
Thurs    28
Fri      24
dtype: int64

### 1.2. Accessing series data

In [5]:
temp.iloc[2]

np.int64(23)

In [6]:
temp['Wed']

np.int64(23)

### 1.3. Series operations

In [7]:
temp.mean()

np.float64(24.4)

In [8]:
temp.max()

np.int64(28)

## 2. DataFrame: two-dimensional data

### 2.1. Creating dataframes

In [26]:
data = {
    'name': ['Alice', 'Bob', 'Charlie', 'Diana'],
    'age': [25, 30, 35, 28],
    'city': ['New York', 'Paris', 'London', 'Tokyo'],
    'salary': [50000, 60000, 55000, 58000]
}

type(data)

dict

In [58]:
df = pd.DataFrame(data)
df

Unnamed: 0,name,age,city,salary
0,Alice,25,New York,50000
1,Bob,30,Paris,60000
2,Charlie,35,London,55000
3,Diana,28,Tokyo,58000


### 2.2. Setting the index

In [59]:
index = [1, 2, 3, 4]
df.set_index(pd.Index(index), inplace=True)

In [60]:
df

Unnamed: 0,name,age,city,salary
1,Alice,25,New York,50000
2,Bob,30,Paris,60000
3,Charlie,35,London,55000
4,Diana,28,Tokyo,58000


In [61]:
df.reset_index(inplace=True, drop=True)

In [62]:
df

Unnamed: 0,name,age,city,salary
0,Alice,25,New York,50000
1,Bob,30,Paris,60000
2,Charlie,35,London,55000
3,Diana,28,Tokyo,58000


### 2.3. Removing rows or columns

In [None]:
df.drop(2, axis=0, inplace=True)

In [55]:
df.drop(2, axis=0, inplace=True)

In [56]:
df

Unnamed: 0,name,age,city,salary
0,Alice,25,New York,50000
1,Bob,30,Paris,60000


In [66]:
df.head(2)

Unnamed: 0,name,age,city,salary
0,Alice,25,New York,50000
1,Bob,30,Paris,60000


In [67]:
df.tail(2)

Unnamed: 0,name,age,city,salary
2,Charlie,35,London,55000
3,Diana,28,Tokyo,58000


In [63]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    4 non-null      object
 1   age     4 non-null      int64 
 2   city    4 non-null      object
 3   salary  4 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 260.0+ bytes


In [64]:
df.describe()

Unnamed: 0,age,salary
count,4.0,4.0
mean,29.5,55750.0
std,4.203173,4349.32945
min,25.0,50000.0
25%,27.25,53750.0
50%,29.0,56500.0
75%,31.25,58500.0
max,35.0,60000.0


In [68]:
df['name']

0      Alice
1        Bob
2    Charlie
3      Diana
Name: name, dtype: object

In [69]:
df[['name', 'city']]

Unnamed: 0,name,city
0,Alice,New York
1,Bob,Paris
2,Charlie,London
3,Diana,Tokyo


In [70]:
index = ['first', 'second', 'third', 'fourth']
df = df.set_index(pd.Index(index))
df

Unnamed: 0,name,age,city,salary
first,Alice,25,New York,50000
second,Bob,30,Paris,60000
third,Charlie,35,London,55000
fourth,Diana,28,Tokyo,58000


In [71]:
df.loc['first']

name         Alice
age             25
city      New York
salary       50000
Name: first, dtype: object

In [74]:
df.loc[['first', 'second']]

Unnamed: 0,name,age,city,salary
first,Alice,25,New York,50000
second,Bob,30,Paris,60000


In [75]:
df.loc[['first', 'second'], ['name', 'salary']]

Unnamed: 0,name,salary
first,Alice,50000
second,Bob,60000


In [76]:
df[df['salary'] > 50000]

Unnamed: 0,name,age,city,salary
second,Bob,30,Paris,60000
third,Charlie,35,London,55000
fourth,Diana,28,Tokyo,58000


In [77]:
df[df['city'] == 'Tokyo']

Unnamed: 0,name,age,city,salary
fourth,Diana,28,Tokyo,58000


In [80]:
salary_thresh = df['salary'] > 50000

In [81]:
df[salary_thresh]

Unnamed: 0,name,age,city,salary
second,Bob,30,Paris,60000
third,Charlie,35,London,55000
fourth,Diana,28,Tokyo,58000


In [82]:
df.columns

Index(['name', 'age', 'city', 'salary'], dtype='object')

In [85]:
rename_dict = {'city': 'location'}

df = df.rename(columns=rename_dict)

In [86]:
df

Unnamed: 0,name,age,location,salary
first,Alice,25,New York,50000
second,Bob,30,Paris,60000
third,Charlie,35,London,55000
fourth,Diana,28,Tokyo,58000
