## Get the pandas version?

In [2]:
import numpy as np  # optional
import pandas as pd

# print(pd.__version__)
# print(pd.show_versions(as_json=True))

## create a series from a list, numpy array and dict?

In [19]:
# from list
mylist = list('ABCDE')

In [26]:
# Solution
pd.Series(mylist)

0    A
1    B
2    C
3    D
4    E
dtype: object

In [21]:
# from arr
myarr = np.arange(5)

In [22]:
# Solution
pd.Series(myarr)

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [24]:
# from dict
mydict = dict(zip(myarr, mylist))
mydict

{0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E'}

In [25]:
# Solution
pd.Series(mydict)

0    A
1    B
2    C
3    D
4    E
dtype: object

## convert the index of a series into a column of a dataframe?

Convert the series $ser$ into a dataframe with its index as another column on the dataframe.

In [31]:
# from list
mylist1 = list('ABCDE')
mylist2 = list('abcde')

In [32]:
mydict = dict(zip(mylist1, mylist2))

In [34]:
ser = pd.Series(mydict)
ser

A    a
B    b
C    c
D    d
E    e
dtype: object

In [46]:
# solution
print(pd.DataFrame(ser, columns = ['value']))
print(pd.DataFrame(ser, columns = ['value']).reset_index())

  value
A     a
B     b
C     c
D     d
E     e
  index value
0     A     a
1     B     b
2     C     c
3     D     d
4     E     e


## How to combine many series to form a dataframe?

In [47]:
ser1 = pd.Series(list('abcdef'))
ser2 = pd.Series(np.arange(6))

In [48]:
# solution1 - using pd.DataFrame
pd.DataFrame({'col1': ser1, 'col2': ser2})

Unnamed: 0,col1,col2
0,a,0
1,b,1
2,c,2
3,d,3
4,e,4
5,f,5


In [50]:
# solution2 - using pd.concat
pd.concat([ser1, ser2], axis=1)

Unnamed: 0,0,1
0,a,0
1,b,1
2,c,2
3,d,3
4,e,4
5,f,5


## assign name to the series’ index

In [51]:
pd.Series([1, 2, 3, 4, 5], name = 'Price')

0    1
1    2
2    3
3    4
4    5
Name: Price, dtype: int64

## get the items of series A not present in series B?

In [52]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [55]:
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

## get the minimum, 25th percentile, median, 75th, and max of a numeric series?

In [56]:
ser = pd.Series(np.random.normal(10, 5, 25))

In [57]:
np.percentile(ser, q=[0, 25, 50, 75, 100])

array([ 1.87355022,  8.34528211, 10.03686855, 13.50454725, 21.46892701])

## get frequency counts of unique items of a series?

In [60]:
mylist = list('abcdefgh')

ser = pd.Series(np.take(mylist, np.random.randint(len(mylist), size=30)))
ser

0     d
1     e
2     e
3     c
4     c
5     e
6     h
7     a
8     c
9     h
10    g
11    d
12    e
13    a
14    c
15    h
16    f
17    b
18    a
19    f
20    b
21    f
22    d
23    a
24    b
25    d
26    d
27    f
28    d
29    e
dtype: object

In [61]:
# Solution
ser.value_counts()

d    6
e    5
a    4
f    4
c    4
b    3
h    3
g    1
dtype: int64

## keep only top 2 most frequent values as it is and replace everything else as ‘Other’?

In [68]:
mylist = list('abcdefgh')

ser = pd.Series(np.take(mylist, np.random.randint(len(mylist), size=30)))

In [71]:
ser.value_counts()

b    8
c    5
h    4
f    4
a    3
e    3
g    3
dtype: int64

In [73]:
# solution
print("Top 2 Freq:\n", ser.value_counts())

Top 2 Freq:
 b    8
c    5
h    4
f    4
a    3
e    3
g    3
dtype: int64


####  using pandas datetime with custom holidays and calculating date differences

In [2]:
import pandas as pd

In [3]:
start_date = pd.to_datetime('2022-07-01')
end_date   = pd.to_datetime('2022-07-31')

custom_holidays = [pd.to_datetime('2022-07-04'), 
                   pd.to_datetime('2023-07-24')]

In [4]:
all_dates = pd.date_range(start=start_date, end=end_date)

In [5]:
def is_holiday_or_weekend(date):
    return date in custom_holidays or date.weekday() >= 5

In [6]:
working_days = [date for date in all_dates if not is_holiday_or_weekend(date)]

In [7]:
len(working_days)

20

In [8]:
!pip install pandas_market_calendars

Collecting pandas_market_calendars
  Downloading pandas_market_calendars-4.1.4-py3-none-any.whl (98 kB)
                                              0.0/98.9 kB ? eta -:--:--
     ----------------------------             71.7/98.9 kB 2.0 MB/s eta 0:00:01
     ---------------------------------------- 98.9/98.9 kB 2.9 MB/s eta 0:00:00
Collecting exchange-calendars>=3.3 (from pandas_market_calendars)
  Downloading exchange_calendars-4.2.8-py3-none-any.whl (191 kB)
                                              0.0/191.4 kB ? eta -:--:--
     -------------------------------------- 191.4/191.4 kB 5.8 MB/s eta 0:00:00
Collecting pyluach (from exchange-calendars>=3.3->pandas_market_calendars)
  Downloading pyluach-2.2.0-py3-none-any.whl (25 kB)
Collecting korean-lunar-calendar (from exchange-calendars>=3.3->pandas_market_calendars)
  Downloading korean_lunar_calendar-0.3.1-py3-none-any.whl (9.0 kB)
Installing collected packages: korean-lunar-calendar, pyluach, exchange-calendars, pandas_marke

In [10]:
import pandas_market_calendars as mcal

In [11]:
# Show available calendars
print(mcal.get_calendar_names())

['ASX', 'BMF', 'B3', 'CFE', 'CBOE_Futures', 'CBOE_Equity_Options', 'CBOE_Index_Options', 'CME_Equity', 'CBOT_Equity', 'CME_Agriculture', 'CBOT_Agriculture', 'COMEX_Agriculture', 'NYMEX_Agriculture', 'CME_Rate', 'CBOT_Rate', 'CME_InterestRate', 'CBOT_InterestRate', 'CME_Bond', 'CBOT_Bond', 'CMEGlobex_Livestock', 'CMEGlobex_Live_Cattle', 'CMEGlobex_Feeder_Cattle', 'CMEGlobex_Lean_Hog', 'CMEGlobex_Port_Cutout', 'CMEGlobex_FX', 'CME_FX', 'CME_Currency', 'CMEGlobex_EnergyAndMetals', 'CMEGlobex_Energy', 'CMEGlobex_CrudeAndRefined', 'CMEGlobex_NYHarbor', 'CMEGlobex_HO', 'HO', 'CMEGlobex_Crude', 'CMEGlobex_CL', 'CL', 'CMEGlobex_Gas', 'CMEGlobex_RB', 'RB', 'CMEGlobex_MicroCrude', 'CMEGlobex_MCL', 'MCL', 'CMEGlobex_NatGas', 'CMEGlobex_NG', 'NG', 'CMEGlobex_Dutch_NatGas', 'CMEGlobex_TTF', 'TTF', 'CMEGlobex_LastDay_NatGas', 'CMEGlobex_NN', 'NN', 'CMEGlobex_CarbonOffset', 'CMEGlobex_CGO', 'CGO', 'C-GEO', 'CMEGlobex_NGO', 'NGO', 'CMEGlobex_GEO', 'GEO', 'CMEGlobex_Metals', 'CMEGlobex_PreciousMetals',

In [13]:
# Create a calendar
nyse = mcal.get_calendar('NYSE')

In [16]:
nyse.schedule(start_date='2012-07-01', end_date='2012-07-31')

Unnamed: 0,market_open,market_close
2012-07-02,2012-07-02 13:30:00+00:00,2012-07-02 20:00:00+00:00
2012-07-03,2012-07-03 13:30:00+00:00,2012-07-03 17:00:00+00:00
2012-07-05,2012-07-05 13:30:00+00:00,2012-07-05 20:00:00+00:00
2012-07-06,2012-07-06 13:30:00+00:00,2012-07-06 20:00:00+00:00
2012-07-09,2012-07-09 13:30:00+00:00,2012-07-09 20:00:00+00:00
2012-07-10,2012-07-10 13:30:00+00:00,2012-07-10 20:00:00+00:00
2012-07-11,2012-07-11 13:30:00+00:00,2012-07-11 20:00:00+00:00
2012-07-12,2012-07-12 13:30:00+00:00,2012-07-12 20:00:00+00:00
2012-07-13,2012-07-13 13:30:00+00:00,2012-07-13 20:00:00+00:00
2012-07-16,2012-07-16 13:30:00+00:00,2012-07-16 20:00:00+00:00


In [17]:
from pandas_market_calendars import get_calendar

In [18]:
def is_custom_holiday(date):
    return date in custom_holidays

In [19]:
def calculate_working_days(start_date, end_date, custom_holidays):
    # Get the NYSE calendar (you can use other calendars depending on your location).
    nyse_calendar = get_calendar('XNYS')
    
    # Get all valid trading days between the start and end dates.
    trading_days = nyse_calendar.valid_days(start_date=start_date, end_date=end_date)
    
    # Filter out the custom holidays and weekends.
    working_days = [date for date in trading_days if not (date.weekday() >= 5 or is_custom_holiday(date))]
    
    return working_days

In [20]:
start_date = pd.to_datetime('2023-07-01')
end_date = pd.to_datetime('2023-07-31')
custom_holidays = [pd.to_datetime('2023-07-04'), pd.to_datetime('2023-07-24')]

working_days = calculate_working_days(start_date, end_date, custom_holidays)
num_working_days = len(working_days)

print("Number of working days:", num_working_days)

Number of working days: 20
