In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {
    'Date': pd.date_range(start='2023-08-01', periods=10, freq='D'),
    'Flavor': ['Vanilla', 'Chocolate', 'Strawberry', 'Mint', 'Caramel', 'Cookies n Cream', 'Rocky Road', 'Mango', 'Pistachio', 'Blueberry'],
    'Units_Sold': [100, 120, 90, 105, 110, 85, 95, 115, 125, 105],
    'Profit': [200, 240, 180, 210, 220, 170, 190, 230, 250, 210]
}

In [3]:
bruams_df = pd.DataFrame(data)
bruams_df

Unnamed: 0,Date,Flavor,Units_Sold,Profit
0,2023-08-01,Vanilla,100,200
1,2023-08-02,Chocolate,120,240
2,2023-08-03,Strawberry,90,180
3,2023-08-04,Mint,105,210
4,2023-08-05,Caramel,110,220
5,2023-08-06,Cookies n Cream,85,170
6,2023-08-07,Rocky Road,95,190
7,2023-08-08,Mango,115,230
8,2023-08-09,Pistachio,125,250
9,2023-08-10,Blueberry,105,210


### Basic Index Resetting:
#### After performing some filtering and manipulation on your DataFrame, the index has become non-sequential. How would you reset the index of the DataFrame to a default integer index?

In [4]:
df_new  = bruams_df.set_index('Date')
df_new

Unnamed: 0_level_0,Flavor,Units_Sold,Profit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-08-01,Vanilla,100,200
2023-08-02,Chocolate,120,240
2023-08-03,Strawberry,90,180
2023-08-04,Mint,105,210
2023-08-05,Caramel,110,220
2023-08-06,Cookies n Cream,85,170
2023-08-07,Rocky Road,95,190
2023-08-08,Mango,115,230
2023-08-09,Pistachio,125,250
2023-08-10,Blueberry,105,210


In [5]:
df_new.reset_index()

Unnamed: 0,Date,Flavor,Units_Sold,Profit
0,2023-08-01,Vanilla,100,200
1,2023-08-02,Chocolate,120,240
2,2023-08-03,Strawberry,90,180
3,2023-08-04,Mint,105,210
4,2023-08-05,Caramel,110,220
5,2023-08-06,Cookies n Cream,85,170
6,2023-08-07,Rocky Road,95,190
7,2023-08-08,Mango,115,230
8,2023-08-09,Pistachio,125,250
9,2023-08-10,Blueberry,105,210


### Dropping the Old Index:
#### You have reset the index of a DataFrame, and it added a new column with the old index values. 
#### How would you reset the index without adding a new column for the old index?

In [6]:
df_new = bruams_df.set_index('Date')
df_new

Unnamed: 0_level_0,Flavor,Units_Sold,Profit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-08-01,Vanilla,100,200
2023-08-02,Chocolate,120,240
2023-08-03,Strawberry,90,180
2023-08-04,Mint,105,210
2023-08-05,Caramel,110,220
2023-08-06,Cookies n Cream,85,170
2023-08-07,Rocky Road,95,190
2023-08-08,Mango,115,230
2023-08-09,Pistachio,125,250
2023-08-10,Blueberry,105,210


In [7]:
df_new.reset_index(drop=True)

Unnamed: 0,Flavor,Units_Sold,Profit
0,Vanilla,100,200
1,Chocolate,120,240
2,Strawberry,90,180
3,Mint,105,210
4,Caramel,110,220
5,Cookies n Cream,85,170
6,Rocky Road,95,190
7,Mango,115,230
8,Pistachio,125,250
9,Blueberry,105,210


### Naming the Index Column:
#### When resetting the index, how would you name the old index column as "Old_Index"?

In [8]:
df_new = bruams_df.set_index('Flavor')
df_new

Unnamed: 0_level_0,Date,Units_Sold,Profit
Flavor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Vanilla,2023-08-01,100,200
Chocolate,2023-08-02,120,240
Strawberry,2023-08-03,90,180
Mint,2023-08-04,105,210
Caramel,2023-08-05,110,220
Cookies n Cream,2023-08-06,85,170
Rocky Road,2023-08-07,95,190
Mango,2023-08-08,115,230
Pistachio,2023-08-09,125,250
Blueberry,2023-08-10,105,210


In [9]:
# using names parameter we can do
# it is available only from 1.5, currently using 1.4
# df_new.reset_index(names='Flavor')

In [10]:
pd.__version__

'1.4.2'

### Level-based Reset:
#### Suppose you have a multi-level index DataFrame. How would you reset only the second level of the index while keeping the first level intact?

In [11]:
# first, we create a sample dataframe with multi index

In [12]:
arrays = [
    ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
    ["one", "two", "one", "two", "one", "two", "one", "two"],
]

In [13]:
tuples = list(zip(*arrays))

In [14]:
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [15]:
mul_index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])

In [16]:
mul_index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [17]:
df = pd.DataFrame(np.random.randn(8, 4), index=mul_index)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bar,one,1.918779,-1.153459,-0.544714,0.170169
bar,two,-0.182991,-1.794435,-0.117361,1.337816
baz,one,0.533582,0.246996,-1.534656,-1.951308
baz,two,-0.025744,-1.284852,-0.71412,0.160692
foo,one,2.308103,-2.742095,1.03499,0.388048
foo,two,-0.604341,-1.1566,0.512595,-0.279715
qux,one,-0.421428,-0.122191,-1.267823,-1.1474
qux,two,-0.829583,-0.127548,-1.503053,1.075078


In [18]:
df.index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [19]:
df.reset_index(level=1)

Unnamed: 0_level_0,second,0,1,2,3
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bar,one,1.918779,-1.153459,-0.544714,0.170169
bar,two,-0.182991,-1.794435,-0.117361,1.337816
baz,one,0.533582,0.246996,-1.534656,-1.951308
baz,two,-0.025744,-1.284852,-0.71412,0.160692
foo,one,2.308103,-2.742095,1.03499,0.388048
foo,two,-0.604341,-1.1566,0.512595,-0.279715
qux,one,-0.421428,-0.122191,-1.267823,-1.1474
qux,two,-0.829583,-0.127548,-1.503053,1.075078


In [20]:
df.reset_index(level=1).index # first index still intact

Index(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

### Reset Index with Different Sorting:
#### After resetting the index of a DataFrame, how can you ensure the new index is in descending order?

In [21]:
df = pd.DataFrame(np.random.randn(4, 2)*10, index=[2,0,1,3])
df

Unnamed: 0,0,1
2,-10.861906,18.074486
0,-13.072029,4.813149
1,6.050194,-2.688901
3,-5.975242,15.037815


In [22]:
df.reset_index(drop=True)

Unnamed: 0,0,1
0,-10.861906,18.074486
1,-13.072029,4.813149
2,6.050194,-2.688901
3,-5.975242,15.037815


In [23]:
df.reset_index(drop=True).sort_index(ascending=False)

Unnamed: 0,0,1
3,-5.975242,15.037815
2,6.050194,-2.688901
1,-13.072029,4.813149
0,-10.861906,18.074486


### Maintaining the DataFrame:
#### By default, the reset_index method returns a new DataFrame. How would you modify the original DataFrame in place without having to assign the result to the same or a new variable?

In [24]:
df = pd.DataFrame(np.random.randn(4, 2)*10, index=[2,0,1,3])
df

Unnamed: 0,0,1
2,13.794859,14.514509
0,3.772569,-16.479619
1,0.088611,-4.049489
3,-7.108226,-15.089173


In [25]:
df.reset_index(drop=True, inplace=True)

In [26]:
df

Unnamed: 0,0,1
0,13.794859,14.514509
1,3.772569,-16.479619
2,0.088611,-4.049489
3,-7.108226,-15.089173


### Combining with other Operations:
#### You've reset the index of your DataFrame and want to subsequently drop one of the columns named "Unwanted_Column".
#### How can you achieve this in a single line?

In [27]:
df = pd.DataFrame(np.random.randn(4, 2), index=[0,2, 3,1])

In [28]:
df

Unnamed: 0,0,1
0,-0.623829,-0.519375
2,-2.026363,-0.323603
3,0.945585,-0.016887
1,0.221734,0.028181


In [29]:
df['Unwanted_Column'] = [90,80,78,90]
df

Unnamed: 0,0,1,Unwanted_Column
0,-0.623829,-0.519375,90
2,-2.026363,-0.323603,80
3,0.945585,-0.016887,78
1,0.221734,0.028181,90


In [30]:
df = df.reset_index(drop=True).drop(columns=['Unwanted_Column'])

In [31]:
df

Unnamed: 0,0,1
0,-0.623829,-0.519375
1,-2.026363,-0.323603
2,0.945585,-0.016887
3,0.221734,0.028181
