In [1]:
import pandas as pd

# ✅ Step 1: Load the dataset with semicolon separator
df = pd.read_csv('/Users/akshay/Desktop/dsbda_practical/DSBDALExam DataSets/DSBDALExam DataSets/dataset_Facebook.csv', sep=';', encoding='latin1')

# ✅ Print basic info
print("Original Dataset Shape:", df.shape)
print("Columns:", df.columns.tolist())

# ✅ Step 2: Create subsets for each post type
photo_posts = df[df['Type'] == 'Photo']
status_posts = df[df['Type'] == 'Status']
video_posts = df[df['Type'] == 'Video'] if 'Video' in df['Type'].unique() else pd.DataFrame()
link_posts = df[df['Type'] == 'Link'] if 'Link' in df['Type'].unique() else pd.DataFrame()

# ✅ Step 3: Merge two subsets (e.g., Photo and Status)
merged_posts = pd.concat([photo_posts, status_posts], ignore_index=True)

# ✅ Step 4: Sort by Page total likes
sorted_df = df.sort_values(by='Page total likes', ascending=False)

# ✅ Step 5: Transpose the dataset
transposed_df = df.T

# ✅ Step 6: Melt data (convert from wide to long format)
melted_df = pd.melt(df,
                    id_vars=['Type', 'Category'],
                    value_vars=['like', 'share', 'comment'],
                    var_name='Interaction Type',
                    value_name='Count')


# ✅ Step 7: Pivot (cast) back to wide format
cast_df = melted_df.pivot_table(index=['Type', 'Category'],
                                columns='Interaction Type',
                                values='Count',
                                aggfunc='sum').reset_index()

# ✅ Optional: Display results
print("\nPhoto Posts Sample:\n", photo_posts.head())
print("\nMerged Posts (Photo + Status):\n", merged_posts.head())
print("\nSorted by Page total likes:\n", sorted_df[['Page total likes', 'Type']].head())
print("\nTransposed Data (first 5 rows):\n", transposed_df.head())
print("\nMelted Data (long format):\n", melted_df.head())
print("\nPivoted Data (wide format):\n", cast_df.head())







Original Dataset Shape: (500, 19)
Columns: ['Page total likes', 'Type', 'Category', 'Post Month', 'Post Weekday', 'Post Hour', 'Paid', 'Lifetime Post Total Reach', 'Lifetime Post Total Impressions', 'Lifetime Engaged Users', 'Lifetime Post Consumers', 'Lifetime Post Consumptions', 'Lifetime Post Impressions by people who have liked your Page', 'Lifetime Post reach by people who like your Page', 'Lifetime People who have liked your Page and engaged with your post', 'comment', 'like', 'share', 'Total Interactions']

Photo Posts Sample:
    Page total likes   Type  Category  Post Month  Post Weekday  Post Hour  \
0            139441  Photo         2          12             4          3   
2            139441  Photo         3          12             3          3   
3            139441  Photo         2          12             2         10   
4            139441  Photo         2          12             2          3   
6            139441  Photo         3          12             1          3 

In [2]:
pd.melt?

[31mSignature:[39m
pd.melt(
    frame: [33m'DataFrame'[39m,
    id_vars=[38;5;28;01mNone[39;00m,
    value_vars=[38;5;28;01mNone[39;00m,
    var_name=[38;5;28;01mNone[39;00m,
    value_name: [33m'Hashable'[39m = [33m'value'[39m,
    col_level=[38;5;28;01mNone[39;00m,
    ignore_index: [33m'bool'[39m = [38;5;28;01mTrue[39;00m,
) -> [33m'DataFrame'[39m
[31mDocstring:[39m
Unpivot a DataFrame from wide to long format, optionally leaving identifiers set.

This function is useful to massage a DataFrame into a format where one
or more columns are identifier variables (`id_vars`), while all other
columns, considered measured variables (`value_vars`), are "unpivoted" to
the row axis, leaving just two non-identifier columns, 'variable' and
'value'.

Parameters
----------
id_vars : scalar, tuple, list, or ndarray, optional
    Column(s) to use as identifier variables.
value_vars : scalar, tuple, list, or ndarray, optional
    Column(s) to unpivot. If not specified, uses al

In [3]:
pd.pivot_table?

[31mSignature:[39m
pd.pivot_table(
    data: [33m'DataFrame'[39m,
    values=[38;5;28;01mNone[39;00m,
    index=[38;5;28;01mNone[39;00m,
    columns=[38;5;28;01mNone[39;00m,
    aggfunc: [33m'AggFuncType'[39m = [33m'mean'[39m,
    fill_value=[38;5;28;01mNone[39;00m,
    margins: [33m'bool'[39m = [38;5;28;01mFalse[39;00m,
    dropna: [33m'bool'[39m = [38;5;28;01mTrue[39;00m,
    margins_name: [33m'Hashable'[39m = [33m'All'[39m,
    observed: [33m'bool | lib.NoDefault'[39m = <no_default>,
    sort: [33m'bool'[39m = [38;5;28;01mTrue[39;00m,
) -> [33m'DataFrame'[39m
[31mDocstring:[39m
Create a spreadsheet-style pivot table as a DataFrame.

The levels in the pivot table will be stored in MultiIndex objects
(hierarchical indexes) on the index and columns of the result DataFrame.

Parameters
----------
data : DataFrame
values : list-like or scalar, optional
    Column or columns to aggregate.
index : column, Grouper, array, or list of the previous
    Key