# **1.Pandas.melt()**

Dataframe wide format to long format

In [1]:
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Math': [85, 92, 78],
    'Science': [90, 88, 95],
}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Math,Science
0,Alice,85,90
1,Bob,92,88
2,Charlie,78,95


In [2]:
melt = pd.melt(df, id_vars=['Name'], var_name='Subject', value_name='Score')
melt

Unnamed: 0,Name,Subject,Score
0,Alice,Math,85
1,Bob,Math,92
2,Charlie,Math,78
3,Alice,Science,90
4,Bob,Science,88
5,Charlie,Science,95


**# 2. Pandas.pivot() and Pandas.pivot_table()**
The pivot() function is used to reshape data by specifying which columns become rows and which columns become values.

The pivot_table() function is used for summarizing and aggregating data. It allows us to compute statistical summaries, such as mean, sum, or count, across different categories, making it useful for data analysis and reporting."

In [9]:
mypivot = melt.pivot(index='Name', columns='Subject', values='Score')
mypivot

Subject,Math,Science
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,85,90
Bob,92,88
Charlie,78,95


In [15]:
mpt = melt.pivot_table(index='Name', columns='Subject', values='Score', aggfunc='mean')
mpt

Subject,Math,Science
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,85.0,90.0
Bob,92.0,88.0
Charlie,78.0,95.0


**pandas .crosstab() and .cut()**

In [16]:
pdf = pd.read_csv('/content/Placement.csv')
pdf

Unnamed: 0,sl_no,gender,ssc_p,ssc_b,hsc_p,hsc_b,hsc_s,degree_p,degree_t,workex,etest_p,specialisation,mba_p,status,salary
0,1,M,67.00,Others,91.00,Others,Commerce,58.00,Sci&Tech,No,55.0,Mkt&HR,58.80,Placed,270000.0
1,2,M,79.33,Central,78.33,Others,Science,77.48,Sci&Tech,Yes,86.5,Mkt&Fin,66.28,Placed,200000.0
2,3,M,65.00,Central,68.00,Central,Arts,64.00,Comm&Mgmt,No,75.0,Mkt&Fin,57.80,Placed,250000.0
3,4,M,56.00,Central,52.00,Central,Science,52.00,Sci&Tech,No,66.0,Mkt&HR,59.43,Not Placed,
4,5,M,85.80,Central,73.60,Central,Commerce,73.30,Comm&Mgmt,No,96.8,Mkt&Fin,55.50,Placed,425000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210,211,M,80.60,Others,82.00,Others,Commerce,77.60,Comm&Mgmt,No,91.0,Mkt&Fin,74.49,Placed,400000.0
211,212,M,58.00,Others,60.00,Others,Science,72.00,Sci&Tech,No,74.0,Mkt&Fin,53.62,Placed,275000.0
212,213,M,67.00,Others,67.00,Others,Commerce,73.00,Comm&Mgmt,Yes,59.0,Mkt&Fin,69.72,Placed,295000.0
213,214,F,74.00,Others,66.00,Others,Commerce,58.00,Comm&Mgmt,No,70.0,Mkt&HR,60.23,Placed,204000.0


In [19]:
cross = pd.crosstab(pdf['gender'], pdf['specialisation']).head()
cross

specialisation,Mkt&Fin,Mkt&HR
gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,37,39
M,83,56


In [22]:
mcut = pd.cut(pdf['ssc_p'], bins=[0, 60, 70, 80, 90, 100])
mcut.head()

Unnamed: 0,ssc_p
0,"(60, 70]"
1,"(70, 80]"
2,"(60, 70]"
3,"(0, 60]"
4,"(80, 90]"


**1. pd.concat() Function**
concat() is used to combine two or more DataFrames along a particular axis (row-wise or column-wise).


concat(): Stacks DataFrames along rows or columns without considering key columns.

merge(): Combines DataFrames based on common keys, similar to SQL joins.

merge_ordered(): Merges while maintaining order and optionally filling gaps in data.


In [24]:
import pandas as pd

# Sample DataFrames
df4 = pd.DataFrame({'Name': ['Alice', 'Bob'], 'Score': [85, 92]})
df6 = pd.DataFrame({'Name': ['Charlie', 'David'], 'Score': [78, 88]})

# Concatenating DataFrames row-wise (axis=0)
concat_result = pd.concat([df6, df4], axis=0)
print(concat_result)


      Name  Score
0  Charlie     78
1    David     88
0    Alice     85
1      Bob     92


# ** pd.merge() Function**
merge() is used to combine two DataFrames based on a common column(s) or index, similar to SQL joins.

In [25]:
# Sample DataFrames
df10 = pd.DataFrame({'Name': ['Alice', 'Bob', 'Charlie'], 'Course': ['Math', 'Science', 'Math']})
df20 = pd.DataFrame({'Name': ['Alice', 'Bob', 'David'], 'Grade': ['A', 'B', 'A']})

# Merging DataFrames on the 'Name' column
merge_result = pd.merge(df10, df20, on='Name', how='inner')
print(merge_result)


    Name   Course Grade
0  Alice     Math     A
1    Bob  Science     B


**# 3. pd.merge_ordered() Function**
merge_ordered() merges two DataFrames, ensuring order is maintained and optionally filling missing values.

In [26]:
# Merging DataFrames on the 'Date' column, maintaining order
merge_ordered_result = pd.merge_ordered(df10, df20, on='Name', fill_method='ffill')
print(merge_ordered_result)

      Name   Course Grade
0    Alice     Math     A
1      Bob  Science     B
2  Charlie     Math     B
3    David     Math     A


pd.factorize() encodes the categorical values into numerical labels. Here, 'apple' is encoded as 0, 'orange' as 1, and 'banana' as 2.

In [28]:
import pandas as pd

# Sample Series
datas = ['apple', 'orange', 'apple', 'banana', 'orange', 'apple']
labels, unique = pd.factorize(datas)

print("Labels:", labels)
print("Unique Values:", unique)


Labels: [0 1 0 2 1 0]
Unique Values: ['apple' 'orange' 'banana']


  labels, unique = pd.factorize(datas)


pd.factorize(): Encodes categorical data into numeric labels.

pd.eval(): Evaluates string expressions on pandas objects for faster computation.

pd.unique(): Returns unique values from a Series or DataFrame column.

pd.wide_to_long(): Converts a DataFrame from a wide format to a long format for time series or categorical analysis.

In [32]:
a = pd.Series([1,0,1,9,8,8,6,7,6])
b = pd.unique(a)
print(b)

[1 0 9 8 6 7]
