In [6]:
import pandas as pd


months = [
    "فروردین", "اردیبهشت", "خرداد", "تیر", "مرداد", "شهریور", 
    "مهر", "آبان", "آذر", "دی", "بهمن", "اسفند"
]

persian_numbers = {
    1: "یک", 2: "دو", 3: "سه", 4: "چهار", 5: "پنج",
    6: "شش", 7: "هفت", 8: "هشت", 9: "نه", 10: "ده",
    11: "یازده", 12: "دوازده", 13: "سیزده", 14: "چهارده", 15: "پانزده",
    16: "شانزده", 17: "هفده", 18: "هجده", 19: "نوزده", 20: "بیست",
    30: "سی", 40: "چهل", 50: "پنجاه", 60: "شصت", 70: "هفتاد", 80: "هشتاد", 90: "نود",
    100: "صد", 1000: "هزار"
}


def convert_year_to_persian(year):
    thousands = "هزار"
    hundreds = "چهار صد"
    tens_units = ""

    
    last_two_digits = year % 100
    if last_two_digits <= 20:
        tens_units = persian_numbers.get(last_two_digits, "")
    else:
        tens = (last_two_digits // 10) * 10
        units = last_two_digits % 10
        tens_units = persian_numbers[tens]
        if units:
            tens_units += f" و {persian_numbers[units]}"

    return f"{thousands} و {hundreds} و {tens_units}"


def generate_date_mappings_with_persian_year(start_year, end_year):
    informal_dates = []
    formal_dates = []
    
    # Loop over years
    for year in range(start_year, end_year + 1):
        # Convert year to Persian words
        year_in_persian = convert_year_to_persian(year)
        
        # Loop over months
        for month_idx, month in enumerate(months):
            days_in_month = 31 if month_idx < 6 else (30 if month_idx < 11 else 29)
            # Loop over days
            for day in range(1, days_in_month + 1):
                # Using a set to avoid duplicate informal formats
                informal_set = set([
                    f"{day} {month} {year_in_persian}",
                    f"روز {day} {month} {year_in_persian}",
                    f"اول {month} {year_in_persian}" if day == 1 else f"روز {day} {month} {year_in_persian}",
                    f"دوم {month} {year_in_persian}" if day == 2 else f"روز {day} {month} {year_in_persian}",
                    f"{day} {month} {year_in_persian}",
                    f"روز {day} {month} {year_in_persian}",
                    f"اول {month} {year_in_persian}" if day == 1 else f"روز {day} {month} {year_in_persian}",
                    f"دوم {month} {year_in_persian}" if day == 2 else f"روز {day} {month} {year_in_persian}",
                    f"سوم {month} {year_in_persian}" if day == 3 else f"روز {day} {month} {year_in_persian}",
                    f"چهارم {month} {year_in_persian}" if day == 4 else f"روز {day} {month} {year_in_persian}",
                    f"پنجم {month} {year_in_persian}" if day == 5 else f"روز {day} {month} {year_in_persian}",
                    f"ششم {month} {year_in_persian}" if day == 6 else f"روز {day} {month} {year_in_persian}",
                    f"هفتم {month} {year_in_persian}" if day == 7 else f"روز {day} {month} {year_in_persian}",
                    f"هشتم {month} {year_in_persian}" if day == 8 else f"روز {day} {month} {year_in_persian}",
                    f"نهم {month} {year_in_persian}" if day == 9 else f"روز {day} {month} {year_in_persian}",
                    f"دهم {month} {year_in_persian}" if day == 10 else f"روز {day} {month} {year_in_persian}",
                    f"یازدهم {month} {year_in_persian}" if day == 11 else f"روز {day} {month} {year_in_persian}",
                    f"دوازدهم {month} {year_in_persian}" if day == 12 else f"روز {day} {month} {year_in_persian}",
                    f"سیزدهم {month} {year_in_persian}" if day == 13 else f"روز {day} {month} {year_in_persian}",
                    f"چهاردهم {month} {year_in_persian}" if day == 14 else f"روز {day} {month} {year_in_persian}",
                    f"پانزدهم {month} {year_in_persian}" if day == 15 else f"روز {day} {month} {year_in_persian}",
                    f"شانزدهم {month} {year_in_persian}" if day == 16 else f"روز {day} {month} {year_in_persian}",
                    f"هفدهم {month} {year_in_persian}" if day == 17 else f"روز {day} {month} {year_in_persian}",
                    f"هجدهم {month} {year_in_persian}" if day == 18 else f"روز {day} {month} {year_in_persian}",
                    f"نوزدهم {month} {year_in_persian}" if day == 19 else f"روز {day} {month} {year_in_persian}",
                    f"بیستم {month} {year_in_persian}" if day == 20 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و یکم {month} {year_in_persian}" if day == 21 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و دوم {month} {year_in_persian}" if day == 22 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و سوم {month} {year_in_persian}" if day == 23 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و چهارم {month} {year_in_persian}" if day == 24 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و پنجم {month} {year_in_persian}" if day == 25 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و ششم {month} {year_in_persian}" if day == 26 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و هفتم {month} {year_in_persian}" if day == 27 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و هشتم {month} {year_in_persian}" if day == 28 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و نهم {month} {year_in_persian}" if day == 29 else f"روز {day} {month} {year_in_persian}",
                    f"سی ام {month} {year_in_persian}" if day == 30 else f"روز {day} {month} {year_in_persian}",
                    f"سی و یکم {month} {year_in_persian}" if day == 31 else f"روز {day} {month} {year_in_persian}",
                ])
                
                
                for informal in informal_set:
                    informal_dates.append(informal)
                    formal_dates.append(f"{year:04d}-{month_idx+1:02d}-{day:02d}")
    
    
    return pd.DataFrame({"informal": informal_dates, "formal": formal_dates})


dataset = generate_date_mappings_with_persian_year(1400, 1410)

# Save to CSV
dataset.to_csv("solar_date_dataset_persian_year.csv", index=False)


In [None]:
# Function to generate informal to formal date mapping with Persian year words
def generate_date_mappings_with_persian_year(start_year, end_year):
    informal_dates = []
    formal_dates = []
    
    # Loop over years
    for year in range(start_year, end_year + 1):
        # Convert year to Persian words
        year_in_persian = convert_year_to_persian(year)
        
        # Loop over months
        for month_idx, month in enumerate(months):
            days_in_month = 31 if month_idx < 6 else (30 if month_idx < 11 else 29)
            # Loop over days
            for day in range(1, days_in_month + 1):
                # Using a set to avoid duplicate informal formats
                informal_set = set([
                    f"{day} {month} {year_in_persian}",
                    f"روز {day} {month} {year_in_persian}",
                    f"اول {month} {year_in_persian}" if day == 1 else f"روز {day} {month} {year_in_persian}",
                    f"دوم {month} {year_in_persian}" if day == 2 else f"روز {day} {month} {year_in_persian}",
                    f"{day} {month} {year_in_persian}",
                    f"روز {day} {month} {year_in_persian}",
                    f"اول {month} {year_in_persian}" if day == 1 else f"روز {day} {month} {year_in_persian}",
                    f"دوم {month} {year_in_persian}" if day == 2 else f"روز {day} {month} {year_in_persian}",
                    f"سوم {month} {year_in_persian}" if day == 3 else f"روز {day} {month} {year_in_persian}",
                    f"چهارم {month} {year_in_persian}" if day == 4 else f"روز {day} {month} {year_in_persian}",
                    f"پنجم {month} {year_in_persian}" if day == 5 else f"روز {day} {month} {year_in_persian}",
                    f"ششم {month} {year_in_persian}" if day == 6 else f"روز {day} {month} {year_in_persian}",
                    f"هفتم {month} {year_in_persian}" if day == 7 else f"روز {day} {month} {year_in_persian}",
                    f"هشتم {month} {year_in_persian}" if day == 8 else f"روز {day} {month} {year_in_persian}",
                    f"نهم {month} {year_in_persian}" if day == 9 else f"روز {day} {month} {year_in_persian}",
                    f"دهم {month} {year_in_persian}" if day == 10 else f"روز {day} {month} {year_in_persian}",
                    f"یازدهم {month} {year_in_persian}" if day == 11 else f"روز {day} {month} {year_in_persian}",
                    f"دوازدهم {month} {year_in_persian}" if day == 12 else f"روز {day} {month} {year_in_persian}",
                    f"سیزدهم {month} {year_in_persian}" if day == 13 else f"روز {day} {month} {year_in_persian}",
                    f"چهاردهم {month} {year_in_persian}" if day == 14 else f"روز {day} {month} {year_in_persian}",
                    f"پانزدهم {month} {year_in_persian}" if day == 15 else f"روز {day} {month} {year_in_persian}",
                    f"شانزدهم {month} {year_in_persian}" if day == 16 else f"روز {day} {month} {year_in_persian}",
                    f"هفدهم {month} {year_in_persian}" if day == 17 else f"روز {day} {month} {year_in_persian}",
                    f"هجدهم {month} {year_in_persian}" if day == 18 else f"روز {day} {month} {year_in_persian}",
                    f"نوزدهم {month} {year_in_persian}" if day == 19 else f"روز {day} {month} {year_in_persian}",
                    f"بیستم {month} {year_in_persian}" if day == 20 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و یکم {month} {year_in_persian}" if day == 21 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و دوم {month} {year_in_persian}" if day == 22 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و سوم {month} {year_in_persian}" if day == 23 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و چهارم {month} {year_in_persian}" if day == 24 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و پنجم {month} {year_in_persian}" if day == 25 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و ششم {month} {year_in_persian}" if day == 26 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و هفتم {month} {year_in_persian}" if day == 27 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و هشتم {month} {year_in_persian}" if day == 28 else f"روز {day} {month} {year_in_persian}",
                    f"بیست و نهم {month} {year_in_persian}" if day == 29 else f"روز {day} {month} {year_in_persian}",
                    f"سی ام {month} {year_in_persian}" if day == 30 else f"روز {day} {month} {year_in_persian}",
                    f"سی و یکم {month} {year_in_persian}" if day == 31 else f"روز {day} {month} {year_in_persian}",
                    
                    
                ])
                
               
                for informal in informal_set:
                    informal_dates.append(informal)
                    formal_dates.append(f"{year:04d}-{month_idx+1:02d}-{day:02d}")
    
    # Return as a DataFrame
    return pd.DataFrame({"informal": informal_dates, "formal": formal_dates})

In [1]:
import pandas as pd

In [6]:
data1 = pd.read_csv("solar_date_dataset_persian_year.csv")
data2 = pd.read_csv("solar_date_dataset_no_duplicates.csv")

data = pd.concat([data1,data2])

In [8]:
data.to_csv("date.csv", index=False)