In [167]:
import pandas as pd
import json
import os

# Set the directory containing JSON files
folder_path = "./"  # <-- change this to your actual folder path

# Initialize list to collect all rows
all_data = []

# Loop over each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".json"):
        year = filename.replace(".json", "")  # Extract year from filename
        file_path = os.path.join(folder_path, filename)
        
        with open(file_path, "r", encoding="utf-8") as f:
            year_data = json.load(f)

        # Flatten each month's data
        for month, days in year_data.items():
            for day in days:
                entry = day.copy()
                entry["month"] = month
                entry["year"] = int(year)
                all_data.append(entry)

# Create DataFrame
df = pd.DataFrame(all_data)

# Preview
print(df.head())


   np  en     tithi event  day  specialday  holiday     month  year
0  31  13   अष्टमी         sun       False    False  Baishakh  2069
1                          mon       False    False  Baishakh  2069
2                          tue       False    False  Baishakh  2069
3                          wed       False    False  Baishakh  2069
4                          thu       False    False  Baishakh  2069


In [168]:
import numpy as np
df = df.replace('', np.nan)

In [169]:
string_cols = df.select_dtypes(include='O').columns
df[string_cols] = df[string_cols].apply(lambda x: x.str.strip())


In [170]:
df[df['tithi'] == 'अष्टमी']

Unnamed: 0,np,en,tithi,event,day,specialday,holiday,month,year
0,31,13,अष्टमी,,sun,False,False,Baishakh,2069
6,2,14,अष्टमी,,sat,False,True,Baishakh,2069
21,17,29,अष्टमी,,sun,False,False,Baishakh,2069
51,16,29,अष्टमी,,tue,False,False,Jestha,2069
65,30,12,अष्टमी,,tue,False,False,Jestha,2069
...,...,...,...,...,...,...,...,...,...
2235,31,15,अष्टमी,हिले जात्रा,tue,False,False,Shrawan,2074
2256,13,29,अष्टमी,गौरा पर्ब,tue,False,True,Bhadra,2074
2271,28,13,अष्टमी,जितिया पर्व,wed,False,False,Bhadra,2074
2286,12,28,अष्टमी,महाअष्टमी,thu,False,True,Ashwin,2074


In [171]:
df.dropna(inplace=True)

In [172]:
df.head()

Unnamed: 0,np,en,tithi,event,day,specialday,holiday,month,year
0,31,13,अष्टमी,,sun,False,False,Baishakh,2069
5,1,13,सप्तमी,नयाँ वर्ष,fri,False,True,Baishakh,2069
6,2,14,अष्टमी,,sat,False,True,Baishakh,2069
7,3,15,नवमी,,sun,False,False,Baishakh,2069
8,4,16,दशमी,,mon,False,False,Baishakh,2069


In [173]:
df.tithi.unique()

array(['अष्टमी', 'सप्तमी', 'नवमी', 'दशमी', 'एकादशी', 'द्वादशी',
       'त्रायोदशी', 'चतुर्दशि', 'औसी', 'प्रतिपदा', 'दृतीया', 'तृतीया',
       'चतुर्थी', 'पञ्चमी', 'षष्ठी', 'पुर्णिमा', '',
       'पद्\u200cमिनी एकादशी', 'परमा एकादशी', 'हरिपरिवर्तिनी एकादशी',
       'इन्दिरा एकादशी', 'पापाङ्\u200dकुशा एकादशी', 'रमा एकादशी',
       'उत्पत्तिका एकादशी', 'मोक्षदा एकादशी', 'सफला द्वादशी',
       'अष्टमी / नवमी', 'प्रतिपदा / दृतीया', 'औसी / प्रतिपदा',
       'षष्ठी / सप्तमी', 'चतुर्दशि औसी', 'à¤¦à¥ƒà¤¤à¥€à¤¯à¤¾',
       'चतुर्थी पञ्चमी', 'त्रायोदशी चतुर्दशि', 'चतुर्दशी',
       'सप्तमी / अष्टमी', 'त्रयोदशी', 'द्वादशी / त्रयोदशी'], dtype=object)

In [174]:
df.event = df.event.map({'नयाँ वर्ष': 'nepali new year', 
  'हरितालिका (तीज)': 'teej',
       'घटस्थापना': 'dashain', 'फूल्पाती':'dashain', 'महाअष्टमी': 'dashain', 'महानवमी':'dashain', 'बिजया दशमी':'dashain',
 'लक्ष्मीपूजा':'tihar',  'भाइटीका': 'tihar',
       'छठपर्व': 'chhat', 'क्रिसमस':'christmas',
       'महा शिवरात्री': 'shivaratri', 'तराईमा होली': 'holi', 
       'फूलपाती':'dashain', 'महानबमी ब्रतम्':'dashain', 
       'लक्ष्मी पूजा':'tihar', 'गाई तिहार':'tihar', 'भाइ टीका':'tihar',
'Christmas Day':'cristmas', 'New Year': 'english new year'
,
       'माघे संक्रान्ति': 'maghe sakranti', 
       'महाशिवरात्री': 'shivaratri',  'होलि पुर्णिमा':'holi', 
    'हरितालिका तीज': 'teej', 
       'छठ पर्व': 'chhat', 'New Year 2015': 'english new year', 'सरस्वती पुजा': 'saraswati puja', 'होलि': 'holi',
       'घटस्थापना साइत: बिहान ११:२५': 'dashain',
       'क्रीसमस': 'christmas', 'सन् २०१६ प्रारम्भ': 'english new year',
       'माघे संक्रान्ति | माधि पर्व': 'maghe sakranti',
       'फागुपूर्णिमा \\ होली': 'holi',
       'विजयादशमी': 'dashain', 
       'गोबर्धन पूजा': 'tihar', 'भाई तिका': 'tihar', 
})

### Choose only 2071 and 2072

In [175]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2011 entries, 0 to 2305
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   np          2011 non-null   object
 1   en          2011 non-null   object
 2   tithi       2011 non-null   object
 3   event       81 non-null     object
 4   day         2011 non-null   object
 5   specialday  2011 non-null   bool  
 6   holiday     2011 non-null   bool  
 7   month       2011 non-null   object
 8   year        2011 non-null   int64 
dtypes: bool(2), int64(1), object(6)
memory usage: 129.6+ KB


In [176]:
df = df[df.year.isin([2071, 2072])]

In [177]:
df.year.unique()

array([2071, 2072], dtype=int64)

In [178]:
df.shape

(728, 9)

In [180]:
df.head()

Unnamed: 0,np,en,tithi,event,day,specialday,holiday,month,year
841,1,14,चतुर्दशी,nepali new year,mon,False,True,Baishakh,2071
842,2,15,पुर्णिमा,,tue,False,False,Baishakh,2071
843,3,16,प्रतिपदा,,wed,False,False,Baishakh,2071
844,4,17,दृतीया,,thu,False,False,Baishakh,2071
845,5,18,तृतीया,,fri,False,False,Baishakh,2071


In [181]:
df.isna().sum()

np              0
en              0
tithi           0
event         695
day             0
specialday      0
holiday         0
month           0
year            0
dtype: int64

In [182]:
df.fillna('no event', inplace=True)

In [183]:
df.head()

Unnamed: 0,np,en,tithi,event,day,specialday,holiday,month,year
841,1,14,चतुर्दशी,nepali new year,mon,False,True,Baishakh,2071
842,2,15,पुर्णिमा,no event,tue,False,False,Baishakh,2071
843,3,16,प्रतिपदा,no event,wed,False,False,Baishakh,2071
844,4,17,दृतीया,no event,thu,False,False,Baishakh,2071
845,5,18,तृतीया,no event,fri,False,False,Baishakh,2071


In [197]:
df.month = df.month.map({
    'Baishakh':1, 
    'Jestha': 2, 
    'Ashadh': 3, 
    'Shrawan': 4, 
    'Bhadra': 5,
    'Ashwin': 6,
    'Kartik': 7, 
    'Mangsir': 8, 
    'Poush': 9, 
    'Magh': 10, 
    'Falgun': 11, 
    'Chaitra': 12
       })

In [198]:
df['nepali_date'] = df.year.astype(str) + '-' + df.month.astype(str) + '-' + df.np.astype(str)

In [199]:
df.head()

Unnamed: 0,np,en,tithi,event,day,specialday,holiday,month,year,nepali_date
841,1,14,चतुर्दशी,nepali new year,mon,False,True,1,2071,2071-1-1
842,2,15,पुर्णिमा,no event,tue,False,False,1,2071,2071-1-2
843,3,16,प्रतिपदा,no event,wed,False,False,1,2071,2071-1-3
844,4,17,दृतीया,no event,thu,False,False,1,2071,2071-1-4
845,5,18,तृतीया,no event,fri,False,False,1,2071,2071-1-5


In [200]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 728 entries, 841 to 1675
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   np           728 non-null    object
 1   en           728 non-null    object
 2   tithi        728 non-null    object
 3   event        728 non-null    object
 4   day          728 non-null    object
 5   specialday   728 non-null    bool  
 6   holiday      728 non-null    bool  
 7   month        728 non-null    int64 
 8   year         728 non-null    int64 
 9   nepali_date  728 non-null    object
dtypes: bool(2), int64(2), object(6)
memory usage: 52.6+ KB


In [201]:
df.np = df.np.astype(int)

In [202]:
from nepali.date_converter import converter

en_year, en_month, en_date = converter.nepali_to_english(2082, 3,1)
print(en_year, en_month, en_date)


2025 6 15


In [210]:
def convert_date_locale(x):
    en_year, en_month, en_date = converter.nepali_to_english(x['year'], x['month'], x['np'])
    return str(en_year) + '-' + str(en_month) + '-' + str(en_date)

df['english_date'] = df.apply(convert_date_locale, axis=1)

In [211]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 728 entries, 841 to 1675
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   np            728 non-null    int32 
 1   en            728 non-null    object
 2   tithi         728 non-null    object
 3   event         728 non-null    object
 4   day           728 non-null    object
 5   specialday    728 non-null    bool  
 6   holiday       728 non-null    bool  
 7   month         728 non-null    int64 
 8   year          728 non-null    int64 
 9   nepali_date   728 non-null    object
 10  english_date  728 non-null    object
dtypes: bool(2), int32(1), int64(2), object(6)
memory usage: 55.5+ KB


In [212]:
df.head()

Unnamed: 0,np,en,tithi,event,day,specialday,holiday,month,year,nepali_date,english_date
841,1,14,चतुर्दशी,nepali new year,mon,False,True,1,2071,2071-1-1,2014-4-14
842,2,15,पुर्णिमा,no event,tue,False,False,1,2071,2071-1-2,2014-4-15
843,3,16,प्रतिपदा,no event,wed,False,False,1,2071,2071-1-3,2014-4-16
844,4,17,दृतीया,no event,thu,False,False,1,2071,2071-1-4,2014-4-17
845,5,18,तृतीया,no event,fri,False,False,1,2071,2071-1-5,2014-4-18


In [213]:
df = df[['nepali_date', 'english_date', 'day', 'specialday', 'holiday', 'event']]

In [215]:
df.day.unique()

array(['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'], dtype=object)

In [217]:
df.loc[df['day'] == 'sat'].holiday = True

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[df['day'] == 'sat'].holiday = True


In [225]:
df.loc[df.day == 'sat', 'holiday'] = True

In [226]:
df.loc[df.day == 'sat'].head()

Unnamed: 0,nepali_date,english_date,day,specialday,holiday,event
846,2071-1-6,2014-4-19,sat,False,True,no event
853,2071-1-13,2014-4-26,sat,True,True,no event
860,2071-1-20,2014-5-3,sat,True,True,no event
867,2071-1-27,2014-5-10,sat,False,True,no event
881,2071-2-3,2014-5-17,sat,True,True,no event


In [227]:
df.to_csv('./holiday_data_combined.csv', index=False)