# Operations


In [1]:
import pandas as pd

In [3]:
# Creating a sample DataFrame

data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 25, 30, 35, 40],
    'Salary': [50000, 60000, 70000, 50000, 60000, 70000, 80000]
}
df = pd.DataFrame(data)

In [5]:
df.head()

Unnamed: 0,Name,Age,Salary
0,Alice,25,50000
1,Bob,30,60000
2,Charlie,35,70000
3,Alice,25,50000
4,Bob,30,60000


## Unique Values
Pandas allows you to find unique values and count occurrences in a column.

In [8]:
# Finding unique values in 'Name' column
unique_names = df['Name'].unique()

In [10]:
unique_names

array(['Alice', 'Bob', 'Charlie', 'David'], dtype=object)

### Counting unique values

In [13]:
display(df['Name'].nunique())

4

### Value Counts
Counting occurrences of each value in 'Name' column (each unique value)

In [16]:
name_counts = df['Name'].value_counts()

In [18]:
name_counts

Name
Alice      2
Bob        2
Charlie    2
David      1
Name: count, dtype: int64

In [20]:
display(unique_names, name_counts)

array(['Alice', 'Bob', 'Charlie', 'David'], dtype=object)

Name
Alice      2
Bob        2
Charlie    2
David      1
Name: count, dtype: int64

### Finding the number of unique values: 
you could only check the length of the array


In [23]:
df['Name'].apply(len)

0    5
1    3
2    7
3    5
4    3
5    7
6    5
Name: Name, dtype: int64

## Selecting Data
We can select specific rows and columns from the DataFrame.

### Selecting a single column

In [27]:
display(df['Age'])

0    25
1    30
2    35
3    25
4    30
5    35
6    40
Name: Age, dtype: int64

### Selecting multiple columns

In [30]:
display(df[['Name', 'Salary']])

Unnamed: 0,Name,Salary
0,Alice,50000
1,Bob,60000
2,Charlie,70000
3,Alice,50000
4,Bob,60000
5,Charlie,70000
6,David,80000


### Selecting a specific row using `.iloc`

In [33]:
df_tmp = df.iloc[2]

In [35]:
df_tmp

Name      Charlie
Age            35
Salary      70000
Name: 2, dtype: object

In [37]:
# Convvert the result to dataframe
df_tmp.to_frame()

Unnamed: 0,2
Name,Charlie
Age,35
Salary,70000


## Filtering Data
Filtering helps in selecting specific rows based on conditions.

In [69]:
# Selecting rows where Age is greater than 30
display(df[df['Age'] > 30])

Unnamed: 0,Name,Age,Salary
2,Charlie,35,70000
5,Charlie,35,70000
6,David,40,80000


In [72]:
df[df['Salary']>50000]

Unnamed: 0,Name,Age,Salary
1,Bob,30,60000
2,Charlie,35,70000
4,Bob,30,60000
5,Charlie,35,70000
6,David,40,80000


In [74]:
df['Salary']>50000

0    False
1     True
2     True
3    False
4     True
5     True
6     True
Name: Salary, dtype: bool

### Getting columns and index names

In [43]:
df.columns

Index(['Name', 'Age', 'Salary'], dtype='object')

In [45]:
df.index

RangeIndex(start=0, stop=7, step=1)

In [49]:
df.sort_values(by='Salary')

Unnamed: 0,Name,Age,Salary
0,Alice,25,50000
3,Alice,25,50000
1,Bob,30,60000
4,Bob,30,60000
2,Charlie,35,70000
5,Charlie,35,70000
6,David,40,80000


### Look for Null values

In [52]:
df.isnull()

Unnamed: 0,Name,Age,Salary
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False
5,False,False,False
6,False,False,False


In [78]:
nan_data={'NAME':['SAM','BLAIKE',None,'FRED'],
          'AGE':[20,30,45,None]
             }

In [80]:
df_nan=pd.DataFrame(data=nan_data)

In [82]:
nan_data

{'NAME': ['SAM', 'BLAIKE', None, 'FRED'], 'AGE': [20, 30, 45, None]}

In [84]:
df_nan

Unnamed: 0,NAME,AGE
0,SAM,20.0
1,BLAIKE,30.0
2,,45.0
3,FRED,


In [89]:
df_nan.isnull()

Unnamed: 0,NAME,AGE
0,False,False
1,False,False
2,True,False
3,False,True


In [91]:
df_nan.dropna(inplace=True)

In [93]:
df_nan

Unnamed: 0,NAME,AGE
0,SAM,20.0
1,BLAIKE,30.0


### Pivot table
A pivot table is a way of summarizing data by reorganizing it. It allows us to aggregate values based on specified index and column criteria.


In [97]:
pivot_data={'NAME':['SAM','BLAIKE',None,'FRED'],
          'AGE':[20,30,45,None],
            'C':['x','y','x','y']}

In [101]:
df_pivot=pd.DataFrame(data=pivot_data)

In [None]:
df_pivot.pivot_table()