# Some Useful Methods in Pandas

In [1]:
import pandas as pd

In [2]:
s=pd.Series([1,2,3,4],
            index=["a","b","c","d"])
s

a    1
b    2
c    3
d    4
dtype: int64

In [3]:
s["a"]

1

In [4]:
s2=s.reindex(["b","d","a","c","e"])
s2

b    2.0
d    4.0
a    1.0
c    3.0
e    NaN
dtype: float64

In [9]:
s3=pd.Series(["blue","yellow","purple"],
             index=[0,2,4])
s3

0      blue
2    yellow
4    purple
dtype: object

In [11]:
import numpy as np
df=pd.DataFrame(np.arange(9).reshape(3,3),
                index=["a","c","d"],
                columns=["Tim","Tom","Kate"])
df

Unnamed: 0,Tim,Tom,Kate
a,0,1,2
c,3,4,5
d,6,7,8


In [12]:
names=["Kate","Tim","Tom"]
df.reindex(columns=names)

Unnamed: 0,Kate,Tim,Tom
a,2,0,1
c,5,3,4
d,8,6,7


In [14]:
df

Unnamed: 0,Tim,Tom,Kate
a,0,1,2
c,3,4,5
d,6,7,8


In [15]:
df.loc[["c","d","a"]]

Unnamed: 0,Tim,Tom,Kate
c,3,4,5
d,6,7,8
a,0,1,2


In [16]:
# drop function

In [17]:
s=pd.Series(np.arange(5.),
            index=["a","b","c","d","e"])
s

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [20]:
new_s=s.drop("b")
new_s

a    0.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [21]:
s.drop(["c","d"])

a    0.0
b    1.0
e    4.0
dtype: float64

In [22]:
data=pd.DataFrame(np.arange(16).reshape(4,4),
                  index=["Kate","Tim",
                         "Tom","Alex"],
                  columns=list("ABCD"))
data

Unnamed: 0,A,B,C,D
Kate,0,1,2,3
Tim,4,5,6,7
Tom,8,9,10,11
Alex,12,13,14,15


In [23]:
data.drop(["Kate","Tim"])

Unnamed: 0,A,B,C,D
Tom,8,9,10,11
Alex,12,13,14,15


In [24]:
data

Unnamed: 0,A,B,C,D
Kate,0,1,2,3
Tim,4,5,6,7
Tom,8,9,10,11
Alex,12,13,14,15


In [25]:
data.drop("A",axis=1)

Unnamed: 0,B,C,D
Kate,1,2,3
Tim,5,6,7
Tom,9,10,11
Alex,13,14,15


In [26]:
data

Unnamed: 0,A,B,C,D
Kate,0,1,2,3
Tim,4,5,6,7
Tom,8,9,10,11
Alex,12,13,14,15


In [27]:
data.drop("Kate",axis=0)

Unnamed: 0,A,B,C,D
Tim,4,5,6,7
Tom,8,9,10,11
Alex,12,13,14,15


In [30]:
data.mean(axis="index")

A    6.0
B    7.0
C    8.0
D    9.0
dtype: float64

In [33]:
data.mean(axis=0)

A    6.0
B    7.0
C    8.0
D    9.0
dtype: float64

In [31]:
data.mean(axis="columns")

Kate     1.5
Tim      5.5
Tom      9.5
Alex    13.5
dtype: float64

In [32]:
data.mean(axis=1)

Kate     1.5
Tim      5.5
Tom      9.5
Alex    13.5
dtype: float64

# Arithmetic Operations in Pandas

In [34]:
import pandas as pd 
import numpy as np

In [35]:
s1=pd.Series(np.arange(4),
             index=["a","c","d","e"])
s2=pd.Series(np.arange(5),
             index=["a","c","e","f","g"])

In [36]:
s1

a    0
c    1
d    2
e    3
dtype: int32

In [37]:
s2

a    0
c    1
e    2
f    3
g    4
dtype: int32

In [38]:
s1+s2

a    0.0
c    2.0
d    NaN
e    5.0
f    NaN
g    NaN
dtype: float64

In [46]:
# why we are getting NaN

import numpy as np

# Performing addition involving NaN
result = 5 + np.nan

print(result)  # Output: NaN


nan


In [58]:
df1=pd.DataFrame(
    np.arange(6).reshape(2,3),
    columns=list("ABC"),
    index=["Tim","Tom"])
df2=pd.DataFrame(
    np.arange(9).reshape(3,3),
    columns=list("ACD"),
    index=["Tim","Kate","Tom"])

In [59]:
print(df1)
print(df2)

     A  B  C
Tim  0  1  2
Tom  3  4  5
      A  C  D
Tim   0  1  2
Kate  3  4  5
Tom   6  7  8


In [60]:
df1+df2

Unnamed: 0,A,B,C,D
Kate,,,,
Tim,0.0,,3.0,
Tom,9.0,,12.0,


In [56]:
df1.add(df2,fill_value=0)

Unnamed: 0,A,B,C,D
Kate,3.0,,4.0,5.0
Tim,0.0,1.0,3.0,2.0
Tom,9.0,4.0,12.0,8.0


In [64]:
df1

Unnamed: 0,A,B,C
Tim,0,1,2
Tom,3,4,5


In [63]:
1/df1

Unnamed: 0,A,B,C
Tim,inf,1.0,0.5
Tom,0.333333,0.25,0.2


In [65]:
df1*3

Unnamed: 0,A,B,C
Tim,0,3,6
Tom,9,12,15


In [66]:
df1.mul(3)

Unnamed: 0,A,B,C
Tim,0,3,6
Tom,9,12,15


In [67]:
df2

Unnamed: 0,A,C,D
Tim,0,1,2
Kate,3,4,5
Tom,6,7,8


# Sorting & Ranking in Pandas

In [68]:
import pandas as pd
import numpy as np

In [69]:
s=pd.Series(range(5),
            index=["e","d","a","b","c"])
s

e    0
d    1
a    2
b    3
c    4
dtype: int64

In [70]:
s.sort_index()

a    2
b    3
c    4
d    1
e    0
dtype: int64

In [71]:
df=pd.DataFrame(
    np.arange(12).reshape(3,4),
    index=["two","one","three"],
    columns=["d","a","b","c"])
df

Unnamed: 0,d,a,b,c
two,0,1,2,3
one,4,5,6,7
three,8,9,10,11


In [72]:
df.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,8,9,10,11
two,0,1,2,3


In [73]:
df.sort_index(axis=1)

Unnamed: 0,a,b,c,d
two,1,2,3,0
one,5,6,7,4
three,9,10,11,8


In [74]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,d,c,b,a
two,0,3,2,1
one,4,7,6,5
three,8,11,10,9


In [75]:
s2=pd.Series([5,np.nan,3,-1,9])
s2

0    5.0
1    NaN
2    3.0
3   -1.0
4    9.0
dtype: float64

In [76]:
s2.sort_values()

3   -1.0
2    3.0
0    5.0
4    9.0
1    NaN
dtype: float64

In [78]:
df2=pd.DataFrame(
    {"a":[5,3,-1,9],"b":[1,-2,0,5]})
df2

Unnamed: 0,a,b
0,5,1
1,3,-2
2,-1,0
3,9,5


In [79]:
df2.sort_values(by="b")

Unnamed: 0,a,b
1,3,-2
2,-1,0
0,5,1
3,9,5


In [80]:
df2.sort_values(by="b",ascending=False)

Unnamed: 0,a,b
3,9,5
0,5,1
2,-1,0
1,3,-2


In [81]:
df2

Unnamed: 0,a,b
0,5,1
1,3,-2
2,-1,0
3,9,5


In [82]:
df2.sort_values(by=["b","a"])

Unnamed: 0,a,b
1,3,-2
2,-1,0
0,5,1
3,9,5


In [83]:
import pandas as pd

# Create a sample DataFrame
data = {'a': [2, 1, 3, 1],
        'b': [1, 2, 2, 3]}
df2 = pd.DataFrame(data)

# Sort the DataFrame by column 'b' first, then by column 'a'
sorted_df = df2.sort_values(by=["b", "a"])

print(sorted_df)


   a  b
0  2  1
1  1  2
2  3  2
3  1  3


In [84]:
df2

Unnamed: 0,a,b
0,2,1
1,1,2
2,3,2
3,1,3


In [85]:
sorted_df

Unnamed: 0,a,b
0,2,1
1,1,2
2,3,2
3,1,3


1. Sort by column 'b': Rows are rearranged so that those with lower values in column 'b' appear first, and those with higher values in column 'b' appear later.
2. Within each group of tied rows (i.e., rows with the same value in column 'b'), sort by column 'a': Rows within each group are rearranged based on the values in column 'a'.

# Practice

In [87]:
data=pd.read_csv("vgsalesGlobale.csv")

In [88]:
data.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


In [89]:
data["Name"].sort_values(ascending=False)

9135                Â¡Shin Chan Flipa en colores!
470                   wwe Smackdown vs. Raw 2006
15523               uDraw Studio: Instant Artist
7835                uDraw Studio: Instant Artist
627                                 uDraw Studio
                          ...                   
8304               .hack//G.U. Vol.3//Redemption
8602     .hack//G.U. Vol.2//Reminisce (jp sales)
7107                .hack//G.U. Vol.2//Reminisce
8357                  .hack//G.U. Vol.1//Rebirth
4754                                 '98 Koshien
Name: Name, Length: 16598, dtype: object