### How to import pandas and check the version?

In [2]:
import pandas as pd
pd.__version__

'1.4.4'

### How to create a series from a list, numpy array and dict?


In [3]:
import numpy as np
a = [1,2,3,4]
b = np.arange(1,11)
c = {"a":"Hello","b":"Hi.."}

In [4]:
a_1 = pd.Series(a)
type(a_1)

pandas.core.series.Series

In [5]:
b_1 = pd.Series(b)
type(b_1)

pandas.core.series.Series

In [6]:
c_1 = pd.Series(c)
type(c_1)

pandas.core.series.Series

### How to convert the index of a series into a column of a dataframe?

In [7]:
 # Input
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)

# Solution
df = ser.to_frame().reset_index()
print(df.head())

  index  0
0     a  0
1     b  1
2     c  2
3     e  3
4     d  4


###  How to combine many series to form a dataframe?

In [8]:
# Input
import numpy as np
ser1 = pd.Series(list('abcef'))
ser2 = pd.Series(np.arange(26))

# Solution 1
df = pd.concat([ser1, ser2], axis=1)

# Solution 2
df = pd.DataFrame({'col1': ser1, 'col2': ser2})
print(df.head())

  col1  col2
0    a     0
1    b     1
2    c     2
3    e     3
4    f     4


### How to assign name to the series’ index?

In [9]:
# Input
ser = pd.Series(list('abcedf'))

# Solution
ser.name = 'alphabets'
ser.head()

0    a
1    b
2    c
3    e
4    d
Name: alphabets, dtype: object

### How to get the items of series A not present in series B?

In [17]:
# Input
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

# Solution
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

### How to get the items not common to both series A and series B?

In [18]:
# Input
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

# Solution
ser_u = pd.Series(np.union1d(ser1, ser2))  # union
ser_i = pd.Series(np.intersect1d(ser1, ser2))  # intersect
ser_u[~ser_u.isin(ser_i)]

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

### How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?

In [19]:
# Input
state = np.random.RandomState(100)
ser = pd.Series(state.normal(10, 5, 25))

# Solution
np.percentile(ser, q=[0, 25, 50, 75, 100])

array([ 1.25117263,  7.70986507, 10.92259345, 13.36360403, 18.0949083 ])

### How to convert a numpy array to a dataframe of given shape?

In [20]:
# Input
r = pd.Series(np.random.randint(1, 10, 35))

# Solution
df = pd.DataFrame(r.values.reshape(7,5))
print(df)

   0  1  2  3  4
0  7  8  4  7  6
1  6  4  4  2  9
2  3  7  5  2  4
3  4  5  6  7  9
4  9  2  2  9  3
5  8  5  7  9  9
6  5  8  5  7  2


### How to find the positions of numbers that are multiples of 2 from a series?

In [1]:
# Input
r = pd.Series(np.random.randint(1, 10, 3))
r

# Solution
print(r)
np.argwhere(r % 2==0)

NameError: name 'pd' is not defined

In [23]:
# Input
ser = pd.Series(np.random.randint(1, 10, 7))
ser

# Solution
print(ser)
np.argwhere(ser % 3==0)

0    9
1    3
2    2
3    9
4    3
5    9
6    4
dtype: int32


ValueError: Length of passed values is 1, index implies 7.

### How to stack two series vertically and horizontally ?

In [25]:
# Input
r1 = pd.Series(range(5))
r2 = pd.Series(list('abcde'))

# Output
# Vertical
r1.append(r2)

# Horizontal
df = pd.concat([r1, r2], axis=1)
print(df)

   0  1
0  0  a
1  1  b
2  2  c
3  3  d
4  4  e


### How to convert the first character of each element in a series to uppercase?

In [28]:
# Input
r = pd.Series(['how', 'to', 'kick', 'ass?'])

# Solution 1
r.map(lambda x: x.title())

0     How
1      To
2    Kick
3    Ass?
dtype: object

In [29]:
r.map(lambda x: x[0].upper() + x[1:])

0     How
1      To
2    Kick
3    Ass?
dtype: object

### How to calculate the number of characters in each word in a series?

In [30]:
# Input
r = pd.Series(['how', 'to', 'kick', 'ass?'])

# Solution
r.map(lambda x: len(x))

0    3
1    2
2    4
3    4
dtype: int64

### How to compute difference of differences between consequtive numbers of a series?

In [31]:
# Input
r = pd.Series([1, 3, 6, 10, 15, 21, 27, 35])

# Solution
print(r.diff().tolist())


[nan, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 8.0]


### How to get the day of month, week number, day of year and day of week from a series of date strings?

In [34]:
# Input
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])

# Solution
from dateutil.parser import parse
ser_ts = ser.map(lambda x: parse(x))

# day of month
print("Date: ", ser_ts.dt.day.tolist())

# week number
print("Week number: ", ser_ts.dt.weekofyear.tolist())

# day of year
print("Day number of year: ", ser_ts.dt.dayofyear.tolist())

# day of week
# print("Day of week: ", ser_ts.dt.weekday_name.tolist())


Date:  [1, 2, 3, 4, 5, 6]
Week number:  [53, 5, 9, 14, 19, 23]
Day number of year:  [1, 33, 63, 94, 125, 157]


  print("Week number: ", ser_ts.dt.weekofyear.tolist())


### How to filter words that contain atleast 2 vowels from a series?

In [35]:
# Input
ser = pd.Series(['Apple', 'Orange', 'Plan', 'Python', 'Money'])

# Solution
from collections import Counter
mask = ser.map(lambda x: sum([Counter(x.lower()).get(i, 0) for i in list('aeiou')]) >= 2)
ser[mask]

0     Apple
1    Orange
4     Money
dtype: object

## How to filter valid emails from a series?

In [36]:
# Input
emails = pd.Series(['buying books at amazom.com', 'rameses@egypt.com', 'matt@t.co', 'narendra@modi.com'])

# Solution 1 (as series of strings)
import re
pattern ='[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}'
mask = emails.map(lambda x: bool(re.match(pattern, x)))
emails[mask]

# Solution 2 (as series of list)
emails.str.findall(pattern, flags=re.IGNORECASE)

# Solution 3 (as list)
[x[0] for x in [re.findall(pattern, email) for email in emails] if len(x) > 0]

['rameses@egypt.com', 'matt@t.co', 'narendra@modi.com']

## How to get the mean of a series grouped by another series?

In [37]:
# Input
fruit = pd.Series(np.random.choice(['apple', 'banana', 'carrot'], 10))
weights = pd.Series(np.linspace(1, 10, 10))

# Solution
weights.groupby(fruit).mean()

apple     5.00
banana    8.25
carrot    3.40
dtype: float64

### How to replace missing spaces in a string with the least frequent character?

In [38]:
# Input
my_str = 'dbc deb abed gade'

# Solution
ser = pd.Series(list('dbc deb abed gade'))
freq = ser.value_counts()
print(freq)
least_freq = freq.dropna().index[-1]
"".join(ser.replace(' ', least_freq))

d    4
     3
b    3
e    3
a    2
c    1
g    1
dtype: int64


'dbcgdebgabedggade'

### How to create a TimeSeries starting ‘2000-01-01’ and 10 weekends (saturdays) after that having random numbers as values?

In [39]:
r = pd.Series(np.random.randint(1,10,10), pd.date_range('2000-01-01', periods=10, freq='W-SAT'))
r

2000-01-01    1
2000-01-08    2
2000-01-15    7
2000-01-22    5
2000-01-29    1
2000-02-05    2
2000-02-12    6
2000-02-19    3
2000-02-26    5
2000-03-04    9
Freq: W-SAT, dtype: int32

### How to compute the autocorrelations of a numeric series?

In [40]:
# Input
ser = pd.Series(np.arange(20) + np.random.normal(1, 10, 20))

# Solution
autocorrelations = [ser.autocorr(i).round(2) for i in range(11)]
print(autocorrelations[1:])
print('Lag having highest correlation: ', np.argmax(np.abs(autocorrelations[1:]))+1)

[0.32, 0.07, -0.09, 0.33, 0.51, -0.17, 0.15, 0.15, 0.52, 0.1]
Lag having highest correlation:  9


###  How to import only specified columns from a csv file?

In [41]:
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv', usecols=['crim', 'medv'])
print(df.head())

      crim  medv
0  0.00632  24.0
1  0.02731  21.6
2  0.02729  34.7
3  0.03237  33.4
4  0.06905  36.2


### How to get the nrows, ncolumns, datatype, summary stats of each column of a dataframe? 
### Also get the array and list equivalent.

In [42]:
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv')

#  number of rows and columns
print(df.shape)

# datatypes
print(df.dtypes)

# how many columns under each dtype
print(df.get_dtype_counts())
print(df.dtypes.value_counts())

# summary statistics
df_stats = df.describe()

# numpy array 
df_arr = df.values

# list
df_list = df.values.tolist()

(93, 27)
Manufacturer           object
Model                  object
Type                   object
Min.Price             float64
Price                 float64
Max.Price             float64
MPG.city              float64
MPG.highway           float64
AirBags                object
DriveTrain             object
Cylinders              object
EngineSize            float64
Horsepower            float64
RPM                   float64
Rev.per.mile          float64
Man.trans.avail        object
Fuel.tank.capacity    float64
Passengers            float64
Length                float64
Wheelbase             float64
Width                 float64
Turn.circle           float64
Rear.seat.room        float64
Luggage.room          float64
Weight                float64
Origin                 object
Make                   object
dtype: object


AttributeError: 'DataFrame' object has no attribute 'get_dtype_counts'

### How to rename a specific columns in a dataframe?

In [43]:
# Input
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv')

# Solution
# Step 1:
df=df.rename(columns = {'Type':'CarType'})
# or
df.columns.values[2] = "CarType"

# Step 2:
df.columns = df.columns.map(lambda x: x.replace('.', '_'))
print(df.columns)

Index(['Manufacturer', 'Model', 'CarType', 'Min_Price', 'Price', 'Max_Price',
       'MPG_city', 'MPG_highway', 'AirBags', 'DriveTrain', 'Cylinders',
       'EngineSize', 'Horsepower', 'RPM', 'Rev_per_mile', 'Man_trans_avail',
       'Fuel_tank_capacity', 'Passengers', 'Length', 'Wheelbase', 'Width',
       'Turn_circle', 'Rear_seat_room', 'Luggage_room', 'Weight', 'Origin',
       'Make'],
      dtype='object')


### How to check if a dataframe has any missing values?

In [44]:
# Input
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv')

# Solution
df.isnull().values.any()

True

### How to replace missing values of multiple numeric columns with the mean?

In [45]:
# Input
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv')

# Solution
df_out = df[['Min.Price', 'Max.Price']] = df[['Min.Price', 'Max.Price']].apply(lambda x: x.fillna(x.mean()))
print(df_out.head())

   Min.Price  Max.Price
0  12.900000  18.800000
1  29.200000  38.700000
2  25.900000  32.300000
3  17.118605  44.600000
4  17.118605  21.459091


### How to select a specific column from a dataframe as a dataframe instead of a series?

In [46]:
# Input
df = pd.DataFrame(np.arange(20).reshape(-1, 5), columns=list('abcde'))

# Solution
type(df[['a']])
type(df.loc[:, ['a']])
type(df.iloc[:, [0]])

pandas.core.frame.DataFrame

### How to format or suppress scientific notations in a pandas dataframe?

In [47]:
# Input
df = pd.DataFrame(np.random.random(4)**10, columns=['random'])

# Solution 1: Rounding
df.round(4)

# Solution 2: Use apply to change format
df.apply(lambda x: '%.4f' % x, axis=1)
# or
df.applymap(lambda x: '%.4f' % x)

Unnamed: 0,random
0,0.0011
1,0.0
2,0.0142
3,0.72


### How to format all the values in a dataframe as percentages?

In [48]:
# Input
df = pd.DataFrame(np.random.random(4), columns=['random'])

# Solution
out = df.style.format({
    'random': '{0:.2%}'.format,
})

In [49]:
out

Unnamed: 0,random
0,74.36%
1,66.66%
2,50.39%
3,62.07%


### How to reverse the rows of a dataframe?

In [50]:
# Input
df = pd.DataFrame(np.arange(25).reshape(5, -1))

# Solution 1
df.iloc[::-1, :]

Unnamed: 0,0,1,2,3,4
4,20,21,22,23,24
3,15,16,17,18,19
2,10,11,12,13,14
1,5,6,7,8,9
0,0,1,2,3,4


### How to split a text column into two separate columns?

In [52]:
# Input
df = pd.DataFrame(["STD, City    State",
"33, Kolkata    West Bengal",
"44, Chennai    Tamil Nadu",
"40, Hyderabad    Telengana",
"80, Bangalore    Karnataka"], columns=['row'])

# Solution
df_out = df.row.str.split(',|\t', expand=True)

# Make first row as header
new_header = df_out.iloc[0]
df_out = df_out[1:]
df_out.columns = new_header
print(df_out)

0 STD            City    State
1  33   Kolkata    West Bengal
2  44    Chennai    Tamil Nadu
3  40   Hyderabad    Telengana
4  80   Bangalore    Karnataka
