In [3]:
# #Reshaping a pandas dataframe is one of the most common data wrangling tasks in␣
# ↪the data analysis world.
# #It is also referred to as transposing or pivoting/unpivoting a table from long␣
# ↪to wide or from wide to long format.
# #So what is a long data format vs. a wide data format and how do we reshape a␣
# ↪dataframe from long-to-wide and vice versa?


In [4]:
# #Syntax: Pandas.Series.values.reshape((dimension))
# #Return: return an ndarray with the values shape if the specified shape matches␣
# ↪exactly the current shape,
# #then return self (for compat)


In [6]:
# import pandas library
import pandas as pd

# make an array with new values
array = [3, 6, 9, 12, 15, 18]

# create a series
series_obj = pd.Series(array)

# convert series object into array
arr = series_obj.values

# output the modified array
arr


array([ 3,  6,  9, 12, 15, 18], dtype=int64)

In [7]:
# reshaping series
reshaped_arr = arr.reshape((2, 3))
# show
reshaped_arr

array([[ 3,  6,  9],
       [12, 15, 18]], dtype=int64)

In [8]:
 # import pandas library
import pandas as pd
# make an array
array = ["Adrian","Solskjær",
"Samuel", "Palma",
"Jake","Anatalya"]
# create a series
series_obj = pd.Series(array)
print("Given Series is:\n", series_obj)
# convert series object into array
arr = series_obj.values
arr

Given Series is:
 0      Adrian
1    Solskjær
2      Samuel
3       Palma
4        Jake
5    Anatalya
dtype: object


array(['Adrian', 'Solskjær', 'Samuel', 'Palma', 'Jake', 'Anatalya'],
      dtype=object)

In [9]:
# reshaping series
reshaped_arr = arr.reshape((3, 2))
# show
print("After Reshaping: \n", reshaped_arr)

After Reshaping: 
 [['Adrian' 'Solskjær']
 ['Samuel' 'Palma']
 ['Jake' 'Anatalya']]


In [10]:
# # The pivot() function is used to reshaped a given DataFrame organized by given␣
# ↪index / column values.
# #This function does not support data aggregation, multiple values will result␣
# ↪in a MultiIndex in the columns.
# #Syntax: DataFrame.pivot(self, index=None, columns=None, values=None)␣
# ↪Parameters: Name

In [11]:
# # pandas.pivot(index, columns, values) function produces pivot table based on 3␣
# ↪columns of the DataFrame.
# #Uses unique values from index / columns and fills with values.
# #Parameters:
# #index[ndarray] : Labels to use to make new frame’s index
# #columns[ndarray] : Labels to use to make new frame’s columns
# #values[ndarray] : Values to use for populating new frame’s values
# #Returns: Reshaped DataFrame
# #Exception: ValueError raised if there are any duplicates.


In [12]:
# Create a simple dataframe
# importing pandas as pd
import pandas as pd
# creating a dataframe
df = pd.DataFrame({'Name': ['Jean Paul Satre', 'Friedrich Nietzsche', 'Salvador Dali'],
'Subject': ['Psychology', 'Philosophy', 'Surrealism'],
'C': [30, 25, 24]})
df

Unnamed: 0,Name,Subject,C
0,Jean Paul Satre,Psychology,30
1,Friedrich Nietzsche,Philosophy,25
2,Salvador Dali,Surrealism,24


In [15]:
# pivot the DataFrame
pivot_table = df.pivot_table(index='Name', columns='Subject', values='C')

# output the pivoted table
print(pivot_table)


Subject              Philosophy  Psychology  Surrealism
Name                                                   
Friedrich Nietzsche        25.0         NaN         NaN
Jean Paul Satre             NaN        30.0         NaN
Salvador Dali               NaN         NaN        24.0


In [16]:
 # value is a list
df.pivot(index ='Name', columns ='Subject', values =['C', 'Name'])

Unnamed: 0_level_0,C,C,C,Name,Name,Name
Subject,Philosophy,Psychology,Surrealism,Philosophy,Psychology,Surrealism
Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Friedrich Nietzsche,25.0,,,Friedrich Nietzsche,,
Jean Paul Satre,,30.0,,,Jean Paul Satre,
Salvador Dali,,,24.0,,,Salvador Dali


In [17]:
#  #the pivot table has been created for the given dataset where the gender␣
# ↪percentage has been calculated.


In [18]:
# importing pandas library
import pandas as pd

# creating dataframe with modified values
df = pd.DataFrame({'Name': ['Alice', 'Bob', 'Ella', 'Mike', 'Lily', 'Tom'],
                   'Gender': ['Female', 'Male', 'Female', 'Male', 'Female', 'Male'],
                   'Age': [31, 19, 27, 42, 35, 24]})

print("Dataset")
print(df)
print("-" * 40)

# categorizing in age groups
def age_bucket(age):
    if age <= 25:
        return "<=25"
    else:
        return ">25"

df['Age Group'] = df['Age'].apply(age_bucket)

# calculating gender percentage
gender = pd.DataFrame(df.Gender.value_counts(normalize=True) * 100).reset_index()
gender.columns = ['Gender', '%Gender']
df = pd.merge(left=df, right=gender, how='inner', on=['Gender'])

# creating pivot table
table = pd.pivot_table(df, index=['Gender', '%Gender', 'Age Group'],
                       values=['Name'], aggfunc={'Name': 'count'})

# display table
print("Table")
print(table)


Dataset
    Name  Gender  Age
0  Alice  Female   31
1    Bob    Male   19
2   Ella  Female   27
3   Mike    Male   42
4   Lily  Female   35
5    Tom    Male   24
----------------------------------------
Table
                          Name
Gender %Gender Age Group      
Female 50.0    >25           3
Male   50.0    <=25          2
               >25           1
