In [283]:
import pandas as pd
import re
import numpy as np

In [284]:
def clean_value(x):
    if pd.isna(x):
        return None
    x = str(x)

    # 1) Take only first part if "69;23" → "69"
    if ";" in x:
        x = x.split(";")[0]

    # 2) Replace comma decimal → dot (16,52 → 16.52)
    x = x.replace(",", ".")

    # 3) Remove spaces
    x = x.strip()

    # 4) Convert to numeric
    try:
        return float(x)
    except:
        return None


In [285]:
coffee_monthly = (
    coffee_yearly
    .set_index("date")
    .groupby("country")["production_tons"]
    .resample("ME")
    .ffill()
    .reset_index()
)
coffee_monthly = coffee_monthly[["date", "country", "production_tons"]]
coffee_monthly.head()

Unnamed: 0,date,country,production_tons
0,1961-01-31,Brazil,2228704.0
1,1961-02-28,Brazil,2228704.0
2,1961-03-31,Brazil,2228704.0
3,1961-04-30,Brazil,2228704.0
4,1961-05-31,Brazil,2228704.0


In [286]:
def clean_value(x):
    """'30,87' / '235' / '69;23' gibi değerleri float'a çevirir."""
    if pd.isna(x):
        return None
    x = str(x)
    # "69;23" → "69"
    if ";" in x:
        x = x.split(";")[0]
    # 30,87 → 30.87
    x = x.replace(",", ".").strip()
    try:
        return float(x)
    except:
        return None


In [287]:
def process_climate_excel(path, country_name):
    df = pd.read_excel(path)

    # Sadece ay satırlarını al (Jan, Feb, ..., Dec)
    months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

    monthly = df[df["Observed Seasonal Cycle"].isin(months)].copy()

    # Ay isimleri
    monthly["Month"] = monthly["Observed Seasonal Cycle"]

    # Ortalama sıcaklık ve yağışı temizle
    monthly["avg_temp"] = monthly["Unnamed: 2"].apply(clean_value)
    monthly["rainfall"] = monthly["Unnamed: 4"].apply(clean_value)

    # Ay ismi → ay numarası
    monthly["Month"] = pd.to_datetime(monthly["Month"], format="%b").dt.month

    # Bu dosyalar aslında 1991–2020 ortalaması → tek "örnek yıl" veriyoruz
    monthly["Year"] = 2020

    # date sütunu
    monthly["date"] = pd.to_datetime(
        dict(year=monthly["Year"], month=monthly["Month"], day=1)
    )

    monthly["country"] = country_name

    return monthly[["date", "country", "avg_temp", "rainfall"]]


In [288]:
brazil_climate   = process_climate_excel(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\brazil.xlsx",   "Brazil")
colombia_climate = process_climate_excel(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\colombia.xlsx", "Colombia")
indo_climate     = process_climate_excel(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\indonesia.xlsx","Indonesia")

climate_all = pd.concat(
    [brazil_climate, colombia_climate, indo_climate],
    ignore_index=True
)

print(climate_all)
print(climate_all["country"].value_counts())


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\hp\\OneDrive\\Belgeler\\DSA210_Project\\data\\brazil.xlsx'

In [None]:
# 1) FAOSTAT Excel dosyasını oku
coffee_raw = pd.read_excel(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\FAOSTAT_coffee.xlsx")

# 2) Kullanacağımız ülkeler
target_countries = ["Brazil", "Colombia", "Indonesia"]

# 3) Sadece Coffee, green + Production satırlarını al
coffee = coffee_raw[
    (coffee_raw["Item"] == "Coffee, green") &
    (coffee_raw["Element"] == "Production") &
    (coffee_raw["Area"].isin(target_countries))
]

# 4) Year → datetime
coffee["date"] = pd.to_datetime(coffee["Year"].astype(int), format="%Y")

# 5) Kolon isimlerini düzenle
coffee.rename(columns={"Area": "country", "Value": "production_tons"}, inplace=True)

# 6) Yıllık tidy tablo
coffee_yearly = coffee[["date", "country", "production_tons"]]

# 7) Yıllık → aylık panel
coffee_monthly = (
    coffee_yearly
    .set_index("date")
    .groupby("country")["production_tons"]
    .resample("ME")
    .ffill()
    .reset_index()
)

print(coffee_monthly.head())


  country       date  production_tons
0  Brazil 1961-01-31        2228704.0
1  Brazil 1961-02-28        2228704.0
2  Brazil 1961-03-31        2228704.0
3  Brazil 1961-04-30        2228704.0
4  Brazil 1961-05-31        2228704.0


In [None]:

# 1) Climate base year: 2020 verisini climate_all içinde tutuyoruz
climate_base = climate_all.copy()

# 2) Climate base verisinden AY numarasını çıkarıyoruz
climate_base["month"] = climate_base["date"].dt.month

# 3) Yeni yıllar aralığı belirle
years = range(2010, 2024)   # 2010–2023

expanded_rows = []

for year in years:
    temp = climate_base.copy()
    temp["date"] = pd.to_datetime({
        "year": year,
        "month": temp["month"],
        "day": 1
    })
    expanded_rows.append(temp)

# 4) Birleştir → yeni genişletilmiş climate veri seti
climate_all_expanded = pd.concat(expanded_rows, ignore_index=True)

# 5) Gereksiz kolonları temizle
climate_all_expanded = climate_all_expanded[["date", "country", "avg_temp", "rainfall"]]

print(climate_all_expanded.head(20))
print(climate_all_expanded.tail(20))
print(climate_all_expanded["country"].value_counts())


         date   country  avg_temp  rainfall
0  2010-01-01    Brazil     26.05    235.00
1  2010-02-01    Brazil     25.98    225.22
2  2010-03-01    Brazil     25.85    240.36
3  2010-04-01    Brazil     25.55    188.05
4  2010-05-01    Brazil     24.65    135.21
5  2010-06-01    Brazil     23.96     84.44
6  2010-07-01    Brazil     23.86     66.10
7  2010-08-01    Brazil     24.86     53.26
8  2010-09-01    Brazil     25.87     73.34
9  2010-10-01    Brazil     26.50    117.39
10 2010-11-01    Brazil     26.23    159.80
11 2010-12-01    Brazil     26.22    199.16
12 2010-01-01  Colombia     25.21    102.47
13 2010-02-01  Colombia     25.48    116.48
14 2010-03-01  Colombia     25.59    181.87
15 2010-04-01  Colombia     25.40    244.59
16 2010-05-01  Colombia     24.96    299.72
17 2010-06-01  Colombia     24.64    280.55
18 2010-07-01  Colombia     24.47    264.26
19 2010-08-01  Colombia     24.70    233.13
          date    country  avg_temp  rainfall
484 2023-05-01   Colombia     

In [None]:

stores = pd.read_csv(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\directory.csv")


target_codes = {"BR": "Brazil", "CO": "Colombia", "ID": "Indonesia"}


stores_filtered = stores[stores["Country"].isin(target_codes.keys())].copy()


stores_filtered["country"] = stores_filtered["Country"].map(target_codes)


store_counts = stores_filtered.groupby("country").size().reset_index(name="store_count")

print(store_counts)


     country  store_count
0     Brazil          102
1   Colombia           11
2  Indonesia          268


In [None]:

store_counts = pd.DataFrame({
    "country": ["Brazil", "Colombia", "Indonesia"],
    "store_count": [102, 11, 268]
})


date_range = pd.date_range("2010-01-01", "2023-12-01", freq="MS")

df_list = []
for _, row in store_counts.iterrows():
    df = pd.DataFrame({
        "date": date_range,
        "country": row["country"],
        "store_count": row["store_count"]
    })
    df_list.append(df)

stores_monthly = pd.concat(df_list, ignore_index=True)

stores_monthly


Unnamed: 0,date,country,store_count
0,2010-01-01,Brazil,102
1,2010-02-01,Brazil,102
2,2010-03-01,Brazil,102
3,2010-04-01,Brazil,102
4,2010-05-01,Brazil,102
...,...,...,...
499,2023-08-01,Indonesia,268
500,2023-09-01,Indonesia,268
501,2023-10-01,Indonesia,268
502,2023-11-01,Indonesia,268


In [None]:
# import pandas as pd

# prices = pd.read_csv("PCOFFOTMUSDM.csv")

# # FRED formatından gelen kolon ismi
# date_col = None
# for c in prices.columns:
#     if c.lower() in ["date", "observation_date", "observationdate"]:
#         date_col = c
#         break

# prices["date"] = pd.to_datetime(prices[date_col])
# prices["coffee_price"] = prices.iloc[:, 1]   # 2. kolon fiyat

# # Gereksiz kolonlar temizle
# prices = prices[["date", "coffee_price"]]

# print("FRED PRICE TABLE READY ✔")
# print(prices.head())


In [None]:
# prices = pd.read_csv(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\PCOFFOTMUSDM.csv")

# # FRED formatından gelen kolon ismi
# date_col = None
# for c in prices.columns:
#     if c.lower() in ["date", "observation_date", "observationdate"]:
#         date_col = c
#         break

# prices["date"] = pd.to_datetime(prices[date_col])
# prices["coffee_price"] = prices.iloc[:, 1]   # 2. kolon fiyat

# # Gereksiz kolonlar temizle
# prices = prices[["date", "coffee_price"]]

# panel = (
#     coffee_monthly
#     .merge(climate_all_expanded, on=["date", "country"], how="left")
#     .merge(prices, on="date", how="left")
#     .merge(stores_monthly, on=["date", "country"], how="left")
# )
# panel = panel[panel["date"].dt.year >= 2010]
# panel

Unnamed: 0,country,date,production_tons,avg_temp,rainfall,coffee_price,store_count
588,Brazil,2010-01-31,2907265.00,,,,
589,Brazil,2010-02-28,2907265.00,,,,
590,Brazil,2010-03-31,2907265.00,,,,
591,Brazil,2010-04-30,2907265.00,,,,
592,Brazil,2010-05-31,2907265.00,,,,
...,...,...,...,...,...,...,...
2230,Indonesia,2022-09-30,774960.53,,,,
2231,Indonesia,2022-10-31,774960.53,,,,
2232,Indonesia,2022-11-30,774960.53,,,,
2233,Indonesia,2022-12-31,774960.53,,,,


In [None]:

df = pd.read_excel(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\PCOFFROBUSDM_indonesia.xlsx")

# 2) Rename columns properly
df = df.rename(columns={
    "observation_date": "date",
    "PCOFFROBUSDM": "coffee_price"
})

# 3) Convert date column
df["date"] = pd.to_datetime(df["date"])

df = df[df["date"].dt.year >= 2000]
# 4) Add country
df["country"] = "Indonesia"

# 5) Keep only needed columns
indonesia_price = df[["date", "country", "coffee_price"]]

# 6) Preview
print(indonesia_price.head())
print(indonesia_price.tail())

          date    country  coffee_price
120 2000-01-01  Indonesia     54.068571
121 2000-02-01  Indonesia     48.987143
122 2000-03-01  Indonesia     46.255217
123 2000-04-01  Indonesia     44.561000
124 2000-05-01  Indonesia     43.742174
          date    country  coffee_price
421 2025-02-01  Indonesia    263.045000
422 2025-03-01  Indonesia    257.684286
423 2025-04-01  Indonesia    246.549091
424 2025-05-01  Indonesia    237.303182
425 2025-06-01  Indonesia    196.213810


In [None]:
panel = panel.merge(indonesia_price, on=["date", "country"], how="left")


In [None]:
col_raw = pd.read_excel(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\coffee_prices_colombia.xlsx")

# 2) Kolon isimlerini düzenle
col_raw = col_raw.rename(columns={
    "Año": "year",
    "Precio externo": "coffee_price"
})

# 3) 2000 ve sonrası
col_raw = col_raw[col_raw["year"] >= 2000]

# 4) Yılı datetime'a çevir (yıllık)
col_raw["date"] = pd.to_datetime(col_raw["year"].astype(int), format="%Y")

# 5) Ülke ekle
col_raw["country"] = "Colombia"

# 6) Yıllık → aylık forward-fill
col_price_monthly = (
    col_raw[["date", "country", "coffee_price"]]
    .set_index("date")
    .groupby("country")["coffee_price"]
    .resample("ME")
    .ffill()
    .reset_index()
)

print(col_price_monthly.head())


    country       date  coffee_price
0  Colombia 2000-01-31    102.290833
1  Colombia 2000-02-29    102.290833
2  Colombia 2000-03-31    102.290833
3  Colombia 2000-04-30    102.290833
4  Colombia 2000-05-31    102.290833


In [None]:
# 1) Brazil dosyasında header 4. satırda, o yüzden:
bra_raw = pd.read_excel(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\coffee_prices_brazil.xlsx", header=3)

# 2) Kolon isimlerini düzenle
bra_raw = bra_raw.rename(columns={
    "Date": "date",
    "Price US$": "coffee_price"
})

# 3) Tarihi datetime'a çevir
# 09/02/1996 formatı büyük ihtimalle mm/dd/yyyy → dayfirst=False
bra_raw["date"] = pd.to_datetime(bra_raw["date"], dayfirst=False, errors="coerce")

# 4) Tarihi okunamayan satırları at
bra_raw = bra_raw.dropna(subset=["date"])

# 5) 2000 ve sonrası
bra_raw = bra_raw[bra_raw["date"].dt.year >= 2000]

# 6) Ülke ekle
bra_raw["country"] = "Brazil"

# 7) Günlük → aylık ortalama fiyat
bra_price_monthly = (
    bra_raw
    .set_index("date")
    .groupby("country")["coffee_price"]
    .resample("ME")
    .mean()
    .reset_index()
)

print(bra_price_monthly.head())


  country       date  coffee_price
0  Brazil 2000-01-31    123.809524
1  Brazil 2000-02-29    111.148571
2  Brazil 2000-03-31    111.482381
3  Brazil 2000-04-30    101.897895
4  Brazil 2000-05-31     98.093182


In [None]:
prices_all = pd.concat(
    [bra_price_monthly, col_price_monthly, indonesia_price],
    ignore_index=True
)

print(prices_all.head())
print(prices_all["country"].value_counts())


  country       date  coffee_price
0  Brazil 2000-01-31    123.809524
1  Brazil 2000-02-29    111.148571
2  Brazil 2000-03-31    111.482381
3  Brazil 2000-04-30    101.897895
4  Brazil 2000-05-31     98.093182
country
Brazil       311
Indonesia    306
Colombia     289
Name: count, dtype: int64


Unnamed: 0,date,country,production_tons,coffee_price,avg_temp,rainfall,store_count
0,1961-01-01,Brazil,2228704.00,,,,
1,1961-02-01,Brazil,2228704.00,,,,
2,1961-03-01,Brazil,2228704.00,,,,
3,1961-04-01,Brazil,2228704.00,,,,
4,1961-05-01,Brazil,2228704.00,,,,
...,...,...,...,...,...,...,...
2230,2022-09-01,Indonesia,774960.53,111.358182,,,268.0
2231,2022-10-01,Indonesia,774960.53,103.011905,,,268.0
2232,2022-11-01,Indonesia,774960.53,92.595000,,,268.0
2233,2022-12-01,Indonesia,774960.53,92.435000,,,268.0


In [295]:
def clean_climate(path, country_name):
    df = pd.read_excel(path)

    # Correct temperature and rainfall columns
    temp_col = [c for c in df.columns if "Average Mean Surface Air Temperature" in c][0]
    rain_col = [c for c in df.columns if "Precipitation" in c][0]

    df["avg_temp"] = df[temp_col]
    df["rainfall"] = df[rain_col]

    # Fix Month formatting
    df["Month"] = df["Month"].astype(str).str[:3]

    # Convert Month to number
    df["Month_num"] = pd.to_datetime(df["Month"], format="%b").dt.month

    # Create all years (2000–2023)
    years = list(range(2000, 2024))
    df_list = []

    for y in years:
        temp_df = df.copy()
        temp_df["Year"] = y
        df_list.append(temp_df)

    df_all = pd.concat(df_list, ignore_index=True)

    # Build date
    df_all["date"] = pd.to_datetime({
        "year": df_all["Year"],
        "month": df_all["Month_num"],
        "day": 1
    })

    df_all["country"] = country_name

    return df_all[["date", "country", "avg_temp", "rainfall"]]



brazil_climate = clean_climate(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\brazil_weather.xlsx", "Brazil")
colombia_climate = clean_climate(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\colombia_weather.xlsx", "Colombia")
indonesia_climate = clean_climate(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\indonesia_weather.xlsx", "Indonesia")

# Combine all
climate_all = pd.concat([brazil_climate, colombia_climate, indonesia_climate], ignore_index=True)


###############################################################################
# 2) CREATE STORE COUNT MONTHLY DATA (Brazil, Colombia, Indonesia)
###############################################################################

stores_raw = pd.DataFrame({
    "country": ["Brazil", "Colombia", "Indonesia"],
    "store_count": [102, 11, 268]
})

# Expand yearly -> monthly
store_months = pd.date_range(start="2000-01-01", end="2023-12-01", freq="MS")
store_list = []

for _, row in stores_raw.iterrows():
    for d in store_months:
        store_list.append([d, row["country"], row["store_count"]])

stores_monthly = pd.DataFrame(store_list, columns=["date", "country", "store_count"])


###############################################################################
# 3) FIX PRICE DATE FORMAT (Make date = first of month)
###############################################################################

prices_all["date"] = prices_all["date"].values.astype("datetime64[M]")


###############################################################################
# 4) FIX CLIMATE DATE FORMAT (already first day of month)
###############################################################################

climate_all["date"] = climate_all["date"].values.astype("datetime64[M]")


###############################################################################
# 5) FIX PANEL DATE FORMAT (first of month)
###############################################################################

panel["date"] = panel["date"].values.astype("datetime64[M]")


###############################################################################
# 6) FINAL MERGE (prices + climate + stores)
###############################################################################

panel = (
    panel
    .merge(prices_all, on=["date","country"], how="left")
    .merge(climate_all, on=["date","country"], how="left")
    .merge(stores_monthly, on=["date","country"], how="left")
)

panel.head()


IndexError: list index out of range

In [297]:
# coffee_monthly'den temiz bir base panel oluştur
panel_base = coffee_monthly[["date", "country", "production_tons"]].copy()
panel_base["date"] = panel_base["date"].values.astype("datetime64[M]")
prices_all["date"] = prices_all["date"].values.astype("datetime64[M]")
climate_all["date"] = climate_all["date"].values.astype("datetime64[M]")
stores_monthly["date"] = stores_monthly["date"].values.astype("datetime64[M]")
panel = (
    panel_base
    .merge(prices_all, on=["date", "country"], how="left")
    .merge(climate_all, on=["date", "country"], how="left")
    .merge(stores_monthly, on=["date", "country"], how="left")
)
panel


Unnamed: 0,date,country,production_tons,coffee_price,avg_temp,rainfall,store_count
0,1961-01-01,Brazil,2228704.00,,,,
1,1961-02-01,Brazil,2228704.00,,,,
2,1961-03-01,Brazil,2228704.00,,,,
3,1961-04-01,Brazil,2228704.00,,,,
4,1961-05-01,Brazil,2228704.00,,,,
...,...,...,...,...,...,...,...
2230,2022-09-01,Indonesia,774960.53,111.358182,,,268.0
2231,2022-10-01,Indonesia,774960.53,103.011905,,,268.0
2232,2022-11-01,Indonesia,774960.53,92.595000,,,268.0
2233,2022-12-01,Indonesia,774960.53,92.435000,,,268.0


In [300]:
# Panel’in 2000 öncesini at
panel = panel[panel["date"] >= "2000-01-01"].copy()

# Climate date zaten 2020–2023
# Price data 2000 sonrası
# Stores monthly 2000 sonrası

panel = (
    panel
    .merge(prices_all, on=["date", "country"], how="left")
    .merge(climate_all, on=["date", "country"], how="left")
    .merge(stores_monthly, on=["date", "country"], how="left")
)


In [301]:
print(panel.isna().sum())
print(panel.head(12))


date                 0
country              0
production_tons      0
coffee_price_x       0
avg_temp_x         795
rainfall_x         795
store_count_x      360
coffee_price_y       0
avg_temp_y         795
rainfall_y         795
store_count_y      360
dtype: int64
         date country  production_tons  coffee_price_x  avg_temp_x  \
0  2000-01-01  Brazil        1903562.0      123.809524         NaN   
1  2000-02-01  Brazil        1903562.0      111.148571         NaN   
2  2000-03-01  Brazil        1903562.0      111.482381         NaN   
3  2000-04-01  Brazil        1903562.0      101.897895         NaN   
4  2000-05-01  Brazil        1903562.0       98.093182         NaN   
5  2000-06-01  Brazil        1903562.0       87.000000         NaN   
6  2000-07-01  Brazil        1903562.0       83.407143         NaN   
7  2000-08-01  Brazil        1903562.0       76.175652         NaN   
8  2000-09-01  Brazil        1903562.0       74.663500         NaN   
9  2000-10-01  Brazil        19035

In [311]:
def clean_climate(path, country_name):
    # Read raw excel
    df_raw = pd.read_excel(path, header=None)

    # The real table starts at row 7:
    # Row 7 → column names
    # Row 8+ → data
    df = pd.read_excel(path, header=7)

    # Rename columns for consistency
    df = df.rename(columns={
        df.columns[0]: "Month",
        df.columns[1]: "avg_min_temp",
        df.columns[2]: "avg_temp",
        df.columns[3]: "avg_max_temp",
        df.columns[4]: "rainfall"
    })

    # Fix comma decimal separator (21,29 → 21.29)
    df["avg_temp"] = df["avg_temp"].astype(str).str.replace(",", ".").astype(float)
    df["rainfall"] = df["rainfall"].astype(str).str.replace(",", ".").astype(float)

    # Keep only useful columns
    df = df[["Month", "avg_temp", "rainfall"]]

    # Convert month names to numbers
    df["Month_num"] = pd.to_datetime(df["Month"], format="%b").dt.month

    # Expand from one-year climatology to 2000–2023
    years = list(range(2000, 2024))
    df_list = []

    for y in years:
        temp_df = df.copy()
        temp_df["Year"] = y
        df_list.append(temp_df)

    df_all = pd.concat(df_list, ignore_index=True)

    # Generate real monthly datetime
    df_all["date"] = pd.to_datetime({
        "year": df_all["Year"],
        "month": df_all["Month_num"],
        "day": 1
    })

    df_all["country"] = country_name

    return df_all[["date", "country", "avg_temp", "rainfall"]]
brazil_climate = clean_climate(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\brazil_weather.xlsx", "Brazil")
colombia_climate = clean_climate(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\colombia_weather.xlsx", "Colombia")
indonesia_climate = clean_climate(r"C:\Users\hp\OneDrive\Belgeler\DSA210_Project\data\indonesia_weather.xlsx", "Indonesia")

climate_all = pd.concat([brazil_climate, colombia_climate, indonesia_climate], ignore_index=True)
panel = (
    panel_base
    .merge(prices_all, on=["date","country"], how="left")
    .merge(climate_all, on=["date","country"], how="left")
    .merge(stores_monthly, on=["date","country"], how="left")
)


In [321]:
panel = panel[panel["date"] >= "2000-01-01"].copy()

panel["store_count"] = panel["store_count"].fillna(method="ffill")


  panel["store_count"] = panel["store_count"].fillna(method="ffill")


In [325]:
# Fill store_count NA by country constant values
store_values = {
    "Brazil": 102,
    "Colombia": 11,
    "Indonesia": 268
}

panel["store_count"] = panel.apply(
    lambda row: store_values[row["country"]] if pd.isna(row["store_count"]) else row["store_count"],
    axis=1
)
panel

Unnamed: 0,date,country,production_tons,coffee_price,avg_temp,rainfall,store_count
468,2000-01-01,Brazil,1903562.00,123.809524,26.05,235.00,102.0
469,2000-02-01,Brazil,1903562.00,111.148571,25.98,225.22,102.0
470,2000-03-01,Brazil,1903562.00,111.482381,25.85,240.36,102.0
471,2000-04-01,Brazil,1903562.00,101.897895,25.55,188.05,102.0
472,2000-05-01,Brazil,1903562.00,98.093182,24.65,135.21,102.0
...,...,...,...,...,...,...,...
2230,2022-09-01,Indonesia,774960.53,111.358182,25.82,165.95,268.0
2231,2022-10-01,Indonesia,774960.53,103.011905,26.14,210.18,268.0
2232,2022-11-01,Indonesia,774960.53,92.595000,26.17,254.66,268.0
2233,2022-12-01,Indonesia,774960.53,92.435000,26.02,296.44,268.0
