# Pandas tutorial

# SERIES

In [30]:
import pandas as pd

data = ['Steve', '35', 'Male', '3.5']
series = pd.Series(data, index=['Name', 'Age', 'Gender', 'Rating'])
print(series)

Name      Steve
Age          35
Gender     Male
Rating      3.5
dtype: object


## Pandas Series in Python has three key properties:

![image.png](attachment:image.png)

In [29]:
import pandas as pd  
s = pd.Series([1, 2, 3.5, "text"])  
print(s)  


0       1
1       2
2     3.5
3    text
dtype: object


![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [34]:
s = pd.Series([10, 20, 30])  
print(s)

0    10
1    20
2    30
dtype: int64


In [37]:
s[3] = 40  # This has to give an error because Series size is fixed!
#but it wont, as Pandas extends the Series automatically (not truly immutable) ⚠

In [38]:
print(s)

0    10
1    20
2    30
3    40
dtype: int64


In [43]:
import numpy as np

arr = np.array([1,2,3])
immutable_s=pd.Series(arr)
print(immutable_s)

immutable_s[3]=1000

print(immutable_s)
print(arr)

0    1
1    2
2    3
dtype: int32
0       1
1       2
2       3
3    1000
dtype: int64
[1 2 3]


In [47]:
import numpy as np

arr = np.array([10, 20, 30])
s = pd.Series(arr)
s[3] = 40  # Works, but creates a new object (not modifying the NumPy array)
print(s)


#modifying existing value - yes
s[3] = 40000  
print(s)

0    10
1    20
2    30
3    40
dtype: int64
0       10
1       20
2       30
3    40000
dtype: int64


In [53]:
s.drop(3)
print(s)
#This makes no difference to the original series obj-s

0       10
1       20
2       30
3    40000
dtype: int64


In [49]:
#The row with index "b" is removed, 
#but drop() returns a new Series rather than modifying s in place.
new_s=s.drop(3)
print(s)
print(new_s)

0       10
1       20
2       30
3    40000
dtype: int64
0    10
1    20
2    30
dtype: int64


![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [45]:
s = pd.Series([10, 20, 30])  
s[1] = 50  # Updating value at index 1  
print(s)  


0    10
1    50
2    30
dtype: int64


# PANDAS

![image.png](attachment:image.png)

In [46]:
import pandas as pd

# Data represented as a dictionary
data = {
    'Name': ['Steve', 'Lia', 'Vin', 'Katie'],
    'Age': [32, 28, 45, 38],
    'Gender': ['Male', 'Female', 'Male', 'Female'],
    'Rating': [3.45, 4.6, 3.9, 2.78]
}

# Creating the DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

    Name  Age  Gender  Rating
0  Steve   32    Male    3.45
1    Lia   28  Female    4.60
2    Vin   45    Male    3.90
3  Katie   38  Female    2.78


![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [50]:
import pandas as pd  

df = pd.DataFrame({
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [25, 30, 35],
    "Salary": [50000.5, 60000.75, 70000.0]
})

print(df.dtypes)  


Name       object
Age         int64
Salary    float64
dtype: object


![image.png](attachment:image.png)

In [51]:
df["Department"] = ["HR", "IT", "Finance"]  
print(df)


      Name  Age    Salary Department
0    Alice   25  50000.50         HR
1      Bob   30  60000.75         IT
2  Charlie   35  70000.00    Finance


In [52]:
df.drop(columns=["Salary"], inplace=True)
print(df)

      Name  Age Department
0    Alice   25         HR
1      Bob   30         IT
2  Charlie   35    Finance


In [54]:
new_row = {"Name": "David", "Age": 28, "Salary": 55000.0}
df = df.append(new_row, ignore_index=True)  # Appends a new row
print(df)


      Name  Age Department   Salary
0    Alice   25         HR      NaN
1      Bob   30         IT      NaN
2  Charlie   35    Finance      NaN
3    David   28        NaN  55000.0


  df = df.append(new_row, ignore_index=True)  # Appends a new row


In [56]:
# New row as a DataFrame
new_row = pd.DataFrame([{"Name": "David", "Age": 28, "Salary": 55000.0}])

# Use concat() instead of append()
df = pd.concat([df, new_row], ignore_index=True)
print(df)

      Name  Age Department   Salary
0    Alice   25         HR      NaN
1      Bob   30         IT      NaN
2  Charlie   35    Finance      NaN
3    David   28        NaN  55000.0
4    David   28        NaN  55000.0
5    David   28        NaN  55000.0


![image.png](attachment:image.png)

In [58]:
df.at[1, "Age"] = 32  # Updates Age at index 1
df.loc[2, "Name"] = "Chris"  # Updates Name at index 2
print(df)

    Name  Age Department   Salary
0  Alice   25         HR      NaN
1    Bob   32         IT      NaN
2  Chris   35    Finance      NaN
3  David   28        NaN  55000.0
4  David   28        NaN  55000.0
5  David   28        NaN  55000.0


In [59]:
# Change Age for all people older than 30
df.loc[df["Age"] > 30, "Age"] = 29  
print(df)


    Name  Age Department   Salary
0  Alice   25         HR      NaN
1    Bob   29         IT      NaN
2  Chris   29    Finance      NaN
3  David   28        NaN  55000.0
4  David   28        NaN  55000.0
5  David   28        NaN  55000.0


![image.png](attachment:image.png)

![image.png](attachment:image.png)

# Series and DataFrame Attributes

In [None]:
#1️⃣ dtype – Returns the data type of elements

In [60]:
import pandas as pd

s = pd.Series([1, 2, 3])
print(s.dtype)  # Output: int64


int64


In [66]:
df = pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]})
print(df.dtypes)


Name    object
Age      int64
dtype: object


In [None]:
#2️⃣ index – Provides row labels

In [62]:
s = pd.Series([10, 20, 30], index=["a", "b", "c"])
print(s.index)


Index(['a', 'b', 'c'], dtype='object')


In [64]:
df = pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]})
print(df.index)#Remember: Row names, not column names


RangeIndex(start=0, stop=2, step=1)


In [None]:
#3️⃣ values – Returns values as a NumPy array

In [65]:
s = pd.Series([10, 20, 30])
print(s.values)  # Output: [10 20 30]


[10 20 30]


In [68]:
df = pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]})
print(df)
print(df.values)


    Name  Age
0  Alice   25
1    Bob   30
[['Alice' 25]
 ['Bob' 30]]


In [69]:
#4️⃣ shape – Gives (rows, columns)

In [72]:
df = pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30],
                   "Dept": ['IT', 'HR']})
print(df.shape)  # Output: (2, 3) (Rows, Cols)

print(df)

(2, 3)
    Name  Age Dept
0  Alice   25   IT
1    Bob   30   HR


In [None]:
#5️⃣ ndim – Number of dimensions