In [1]:
import pandas as pd

# Load the CSV file
df = pd.read_csv("SunS.csv")  # Update with your file path

# Convert from wide to long format
df_long = df.melt(id_vars=["Year", "Month"], var_name="Day", value_name="Sun Shine")

# Convert Day to integer
df_long["Day"] = df_long["Day"].astype(int)

# Drop rows with missing Sun Shine (e.g. 31 Feb, 30 Apr etc.)
df_long = df_long.dropna(subset=["Sun Shine"])

# Create proper Date column in DD/MM/YYYY format
df_long["Date"] = df_long["Day"].astype(str).str.zfill(2) + "/" + \
                  df_long["Month"].astype(str).str.zfill(2) + "/" + \
                  df_long["Year"].astype(str)

# Final output
df_final = df_long[["Date", "Sun Shine"]].reset_index(drop=True)

# Save to file (optional)
df_final.to_csv("Sun Shine_Daily_Distribution.csv", index=False)

# Show preview
print(df_final.head(15))


          Date Sun Shine
0   01/01/1981      ****
1   01/02/1981      ****
2   01/03/1981      10.1
3   01/04/1981       2.1
4   01/05/1981       9.3
5   01/06/1981       4.1
6   01/07/1981         0
7   01/08/1981      ****
8   01/09/1981      ****
9   01/10/1981         8
10  01/11/1981      ****
11  01/12/1981      ****
12  01/01/1982       7.9
13  01/02/1982       4.7
14  01/03/1982       4.5


In [2]:
import pandas as pd

df = pd.read_csv("Sun Shine_Daily_Distribution.csv")


In [3]:
print(df.head(10))


         Date Sun Shine
0  01/01/1981      ****
1  01/02/1981      ****
2  01/03/1981      10.1
3  01/04/1981       2.1
4  01/05/1981       9.3
5  01/06/1981       4.1
6  01/07/1981         0
7  01/08/1981      ****
8  01/09/1981      ****
9  01/10/1981         8


In [4]:
df["Date"] = pd.to_datetime(df["Date"], format="%d/%m/%Y")
df = df.sort_values("Date").reset_index(drop=True)
df["Date"] = df["Date"].dt.strftime("%d/%m/%Y")


In [5]:
df

Unnamed: 0,Date,Sun Shine
0,01/01/1981,****
1,02/01/1981,****
2,03/01/1981,****
3,04/01/1981,****
4,05/01/1981,****
...,...,...
4378,27/12/1994,6.3
4379,28/12/1994,6.5
4380,29/12/1994,8.1
4381,30/12/1994,7.5


In [6]:
df.to_csv("Sun Shine_Daily_Distribution_Fixed.csv", index=False)


In [7]:
# Check for missing values
missing_summary = df.isnull().sum()

# Show where missing values exist
print(missing_summary)

Date         0
Sun Shine    0
dtype: int64


In [8]:
# Check if any cell contains '***'
contains_stars = df.applymap(lambda x: '****' in str(x)).any().any()

if contains_stars:
    print("CSV file er kono cell e '****' ache.")
else:
    print("CSV file er kono cell e '****' nei.")

CSV file er kono cell e '****' ache.


  contains_stars = df.applymap(lambda x: '****' in str(x)).any().any()


In [9]:
df

Unnamed: 0,Date,Sun Shine
0,01/01/1981,****
1,02/01/1981,****
2,03/01/1981,****
3,04/01/1981,****
4,05/01/1981,****
...,...,...
4378,27/12/1994,6.3
4379,28/12/1994,6.5
4380,29/12/1994,8.1
4381,30/12/1994,7.5


In [11]:
# Je cells e '***' ache segular index list ber korbo
cells_with_stars = []

for row_idx, row in df.iterrows():
    for col in df.columns:
        cell_value = str(row[col])
        if '***' in cell_value:
            cells_with_stars.append((row_idx, col))

if cells_with_stars:
    print("Cells with '***' found at:")
    for cell in cells_with_stars:
        print(f"Row: {cell[0]}, Column: {cell[1]}")
else:
    print("No cells with '***' found.")

Cells with '***' found at:
Row: 0, Column: Sun Shine
Row: 1, Column: Sun Shine
Row: 2, Column: Sun Shine
Row: 3, Column: Sun Shine
Row: 4, Column: Sun Shine
Row: 5, Column: Sun Shine
Row: 6, Column: Sun Shine
Row: 7, Column: Sun Shine
Row: 8, Column: Sun Shine
Row: 9, Column: Sun Shine
Row: 10, Column: Sun Shine
Row: 11, Column: Sun Shine
Row: 12, Column: Sun Shine
Row: 13, Column: Sun Shine
Row: 14, Column: Sun Shine
Row: 15, Column: Sun Shine
Row: 16, Column: Sun Shine
Row: 17, Column: Sun Shine
Row: 18, Column: Sun Shine
Row: 19, Column: Sun Shine
Row: 20, Column: Sun Shine
Row: 21, Column: Sun Shine
Row: 22, Column: Sun Shine
Row: 23, Column: Sun Shine
Row: 24, Column: Sun Shine
Row: 25, Column: Sun Shine
Row: 26, Column: Sun Shine
Row: 27, Column: Sun Shine
Row: 28, Column: Sun Shine
Row: 29, Column: Sun Shine
Row: 30, Column: Sun Shine
Row: 31, Column: Sun Shine
Row: 32, Column: Sun Shine
Row: 33, Column: Sun Shine
Row: 34, Column: Sun Shine
Row: 35, Column: Sun Shine
Row: 36, Co

In [12]:
# 'Sun Shine' column e '***' ke NaN replace koro
df['Sun Shine'] = df['Sun Shine'].replace('***', pd.NA)

# 'Sun Shine' column ke numeric convert koro (NaN thakbe jekhane '*** chilo)
df['Sun Shine'] = pd.to_numeric(df['Sun Shine'], errors='coerce')

# Mean calculate koro (NaN ignore kore)
mean_val = df['Sun Shine'].mean()

# NaN gula ke mean diye replace koro
df['Sun Shine'].fillna(mean_val, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Sun Shine'].fillna(mean_val, inplace=True)


In [13]:
df

Unnamed: 0,Date,Sun Shine
0,01/01/1981,6.57025
1,02/01/1981,6.57025
2,03/01/1981,6.57025
3,04/01/1981,6.57025
4,05/01/1981,6.57025
...,...,...
4378,27/12/1994,6.30000
4379,28/12/1994,6.50000
4380,29/12/1994,8.10000
4381,30/12/1994,7.50000


In [14]:
# Je cells e '***' ache segular index list ber korbo
cells_with_stars = []

for row_idx, row in df.iterrows():
    for col in df.columns:
        cell_value = str(row[col])
        if '***' in cell_value:
            cells_with_stars.append((row_idx, col))

if cells_with_stars:
    print("Cells with '***' found at:")
    for cell in cells_with_stars:
        print(f"Row: {cell[0]}, Column: {cell[1]}")
else:
    print("No cells with '***' found.")

No cells with '***' found.


In [15]:
# Replace sesh porjonto file save korte paro
df.to_csv("Sun Shine.csv", index=False)

In [16]:
df

Unnamed: 0,Date,Sun Shine
0,01/01/1981,6.57025
1,02/01/1981,6.57025
2,03/01/1981,6.57025
3,04/01/1981,6.57025
4,05/01/1981,6.57025
...,...,...
4378,27/12/1994,6.30000
4379,28/12/1994,6.50000
4380,29/12/1994,8.10000
4381,30/12/1994,7.50000
