In [2]:
import requests
import pandas as pd
from IPython.display import display
import json

def market_fiyat_api(arama_terimi, lat=39.97041, lng=32.85647, distance=5000):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
        'Content-Type': 'application/json',
        'Accept': 'application/json',
        'Accept-Encoding': 'gzip, deflate',
        'Origin': 'https://marketfiyati.org.tr',
        'Referer': 'https://marketfiyati.org.tr/',
        'Cache-Control': 'no-cache',
        'Pragma': 'no-cache',
        'Connection': 'keep-alive'
    }
    
    try:
        # Nearest API isteÄŸi
        nearest_url = 'https://api.marketfiyati.org.tr/api/v2/nearest'
        nearest_data = {
            'latitude': str(lat),
            'longitude': str(lng),
            'distance': distance
        }
        
        requests.packages.urllib3.disable_warnings()
        
        nearest_response = requests.post(
            nearest_url,
            json=nearest_data,
            headers=headers,
            verify=False
        )
        
        nearest_markets = nearest_response.json()
        depots = [market['id'] for market in nearest_markets]
        
        # Search API isteÄŸi
        search_url = 'https://api.marketfiyati.org.tr/api/v2/search'
        search_data = {
            'keywords': arama_terimi,
            'pages': 0,
            'size': 100,
            'depots': depots,
            'latitude': str(lat),
            'longitude': str(lng),
            'distance': distance
        }
        
        search_response = requests.post(
            search_url,
            json=search_data,
            headers=headers,
            verify=False
        )
        
        results = search_response.json()
        
        # Verileri dÃ¼zenle
        rows = []
        for item in results['content']:
            for depot_info in item['productDepotInfoList']:
                row = {
                    'name': item['title'],
                    'brand': item.get('brand', ''),
                    'marketName': depot_info['marketAdi'],
                    'depotName': depot_info['depotName'],
                    'price': depot_info['price'],
                    'updateDate': depot_info['indexTime'],
                    'marketDistance': depot_info.get('distance', 0)
                }
                rows.append(row)
        
        df = pd.DataFrame(rows)
        
        if not df.empty:
            # Tarihi dÃ¼zenle
            df['updateDate'] = pd.to_datetime(df['updateDate'], format='%d.%m.%Y %H:%M')
            df['updateDate'] = df['updateDate'].dt.strftime('%Y-%m-%d %H:%M')
            
            # FiyatlarÄ± dÃ¼zenle
            df['price'] = pd.to_numeric(df['price'], errors='coerce')
            
            # Mesafeyi dÃ¼zenle
            if 'marketDistance' in df.columns:
                df['marketDistance'] = df['marketDistance'].round(2)
                df = df.rename(columns={'marketDistance': 'mesafe_km'})
            
            # SÄ±ralama
            df = df.sort_values('price')
            
            return df
            
        return pd.DataFrame()
        
    except Exception as e:
        print(f"Hata oluÅŸtu: {str(e)}")
        return pd.DataFrame()



In [4]:
ilce_df = pd.read_csv('ilce.csv',index_col=0)
ilce_df

Unnamed: 0,il_plaka,ilce_id,ilce_adi,lat,lon,northeast_lat,northeast_lon,southwest_lat,southwest_lon,Ä°l
0,1,1,ALADAÄž(KARSANTI),37.546379,35.402962,37.555225,35.418969,37.537532,35.386955,Adana
1,1,2,CEYHAN,37.031700,35.822750,37.055723,35.850071,37.006241,35.795967,Adana
2,1,3,Ã‡UKUROVA,37.000000,35.321333,37.072004,35.461995,36.935523,35.174706,Adana
3,1,4,FEKE,37.819918,35.912484,37.824131,35.922007,37.811669,35.903793,Adana
4,1,5,Ä°MAMOÄžLU,37.258751,35.672840,37.272539,35.675316,37.245506,35.646995,Adana
...,...,...,...,...,...,...,...,...,...,...
952,81,953,DÃœZCE,40.843849,31.156540,40.891762,31.195807,40.824717,31.106662,DÃ¼zce
953,81,954,GÃ–LYAKA,40.776579,30.995727,40.783131,31.008601,40.770669,30.989599,DÃ¼zce
954,81,955,GÃœMÃœÅžOVA,40.850000,30.933333,40.853631,30.949511,40.841169,30.930489,DÃ¼zce
955,81,956,KAYNAÅžLI,40.777337,31.303267,40.781931,31.314301,40.769469,31.295299,DÃ¼zce


In [5]:
il_df = pd.read_csv('il.csv')
ilce_df = pd.read_csv('ilce.csv')

# Ä°l merkezlerini al
il_merkezleri = il_df[['plaka', 'il_adi', 'lat', 'lon']].copy()
il_merkezleri = il_merkezleri.rename(columns={
    'plaka': 'il_plaka',
    'il_adi': 'ilce_adi',
    'lon': 'lng'
})

# Her ilden 2 ilÃ§e seÃ§
secili_ilceler = []
import numpy as np
np.random.seed(42)  # Tekrarlanabilirlik iÃ§in

for il_plaka in sorted(ilce_df['il_plaka'].unique()):
    # Ä°l merkezi olmayan ilÃ§eleri al
    il_ilceler = ilce_df[
        (ilce_df['il_plaka'] == il_plaka) & 
        (ilce_df['ilce_adi'] != ilce_df['Ä°l'])  # Ä°l merkezi olmayanlar
    ].copy()
    
    if len(il_ilceler) >= 2:
        # 2 ilÃ§e seÃ§
        secilen = il_ilceler.sample(n=2, random_state=42)
        secili_ilceler.append(secilen[['il_plaka', 'ilce_adi', 'lat', 'lon', 'Ä°l']].rename(columns={'lon': 'lng', 'Ä°l': 'il_adi'}))
    elif len(il_ilceler) == 1:
        # Sadece 1 ilÃ§e varsa onu seÃ§
        secili_ilceler.append(il_ilceler[['il_plaka', 'ilce_adi', 'lat', 'lon', 'Ä°l']].rename(columns={'lon': 'lng', 'Ä°l': 'il_adi'}))

# Ä°l merkezleri ve seÃ§ili ilÃ§eleri birleÅŸtir
secili_merkezler = pd.concat([il_merkezleri] + secili_ilceler, ignore_index=True)

# Ä°statistikler
print("\nSeÃ§ilen Merkez Ä°statistikleri:")
print(f"Toplam il merkezi sayÄ±sÄ±: {len(il_merkezleri)}")
print(f"Toplam ilÃ§e sayÄ±sÄ±: {len(secili_merkezler) - len(il_merkezleri)}")
print(f"Toplam merkez sayÄ±sÄ±: {len(secili_merkezler)}")

# Ä°l bazÄ±nda daÄŸÄ±lÄ±mÄ± kontrol et
il_dagilimi = secili_merkezler.groupby('il_plaka').size()
print("\nÄ°l baÅŸÄ±na dÃ¼ÅŸen merkez sayÄ±sÄ±:")
print(il_dagilimi.value_counts().sort_index())

# Her il iÃ§in seÃ§ilen merkezleri gÃ¶ster
print("\nÄ°l bazÄ±nda seÃ§ilen merkezler:")
for il_plaka in sorted(secili_merkezler['il_plaka'].unique()):
    il_merkezler = secili_merkezler[secili_merkezler['il_plaka'] == il_plaka]
    il_adi = il_merkezler['il_adi'].iloc[0] if 'il_adi' in il_merkezler.columns else il_merkezler['ilce_adi'].iloc[0]
    print(f"\nÄ°l: {il_adi} (Plaka: {il_plaka})")
    print(f"SeÃ§ilen merkezler: {', '.join(il_merkezler['ilce_adi'].tolist())}")
    


SeÃ§ilen Merkez Ä°statistikleri:
Toplam il merkezi sayÄ±sÄ±: 81
Toplam ilÃ§e sayÄ±sÄ±: 162
Toplam merkez sayÄ±sÄ±: 243

Ä°l baÅŸÄ±na dÃ¼ÅŸen merkez sayÄ±sÄ±:
3    81
Name: count, dtype: int64

Ä°l bazÄ±nda seÃ§ilen merkezler:

Ä°l: nan (Plaka: 1)
SeÃ§ilen merkezler: ADANA, SAÄ°MBEYLÄ°, SEYHAN

Ä°l: nan (Plaka: 2)
SeÃ§ilen merkezler: ADIYAMAN, SÄ°NCÄ°K, BESNÄ°

Ä°l: nan (Plaka: 3)
SeÃ§ilen merkezler: AFYONKARAHÄ°SAR, AFYON, BAÅžMAKÃ‡I

Ä°l: nan (Plaka: 4)
SeÃ§ilen merkezler: AÄžRI, DÄ°YADÄ°N, PATNOS

Ä°l: nan (Plaka: 5)
SeÃ§ilen merkezler: AMASYA, AMASYA, GÃ–YNÃœCEK

Ä°l: nan (Plaka: 6)
SeÃ§ilen merkezler: ANKARA, ELMADAÄž, KEÃ‡Ä°Ã–REN

Ä°l: nan (Plaka: 7)
SeÃ§ilen merkezler: ANTALYA, AKSEKÄ°, ELMALI

Ä°l: nan (Plaka: 8)
SeÃ§ilen merkezler: ARTVÄ°N, ARHAVÄ°, MURGUL(GÃ–KTAÅž)

Ä°l: nan (Plaka: 9)
SeÃ§ilen merkezler: AYDIN, AYDIN, BOZDOÄžAN

Ä°l: nan (Plaka: 10)
SeÃ§ilen merkezler: BALIKESÄ°R, AYVALIK, BURHANÄ°YE

Ä°l: nan (Plaka: 11)
SeÃ§ilen merkezler: BÄ°LECÄ°K, BOZÃœYÃœK, PAZARYERÄ°


In [7]:
domates_df=pd.DataFrame()
for i in range(len(secili_merkezler)):
    a=market_fiyat_api("domates",lat=secili_merkezler["lat"].iloc[i],lng=secili_merkezler["lng"].iloc[i])
    a["taranan_merkez"]=secili_merkezler["ilce_adi"].iloc[i]
    a["taranan_lat"]=secili_merkezler["lat"].iloc[i]
    a["taranan_lng"]=secili_merkezler["lng"].iloc[i]
    a["il_plaka"]=secili_merkezler["il_plaka"].iloc[i]
    ilce=secili_merkezler["ilce_adi"].iloc[i]
    domates_df=pd.concat([domates_df,a],axis=0)
    print(f"{ilce} Ã§ekildi")

ADANA Ã§ekildi
ADIYAMAN Ã§ekildi
AFYONKARAHÄ°SAR Ã§ekildi
AÄžRI Ã§ekildi
AMASYA Ã§ekildi
ANKARA Ã§ekildi
ANTALYA Ã§ekildi
ARTVÄ°N Ã§ekildi
AYDIN Ã§ekildi
BALIKESÄ°R Ã§ekildi
BÄ°LECÄ°K Ã§ekildi
BÄ°NGÃ–L Ã§ekildi
BÄ°TLÄ°S Ã§ekildi
BOLU Ã§ekildi
BURDUR Ã§ekildi
BURSA Ã§ekildi
Ã‡ANAKKALE Ã§ekildi
Ã‡ANKIRI Ã§ekildi
Ã‡ORUM Ã§ekildi
DENÄ°ZLÄ° Ã§ekildi
DÄ°YARBAKIR Ã§ekildi
EDÄ°RNE Ã§ekildi
ELAZIÄž Ã§ekildi
ERZÄ°NCAN Ã§ekildi
ERZURUM Ã§ekildi
ESKÄ°ÅžEHÄ°R Ã§ekildi
GAZÄ°ANTEP Ã§ekildi
GÄ°RESUN Ã§ekildi
GÃœMÃœÅžHANE Ã§ekildi
HAKKARÄ° Ã§ekildi
HATAY Ã§ekildi
ISPARTA Ã§ekildi
MERSÄ°N(Ä°Ã‡EL) Ã§ekildi
Ä°STANBUL Ã§ekildi
Ä°ZMÄ°R Ã§ekildi
KARS Ã§ekildi
KASTAMONU Ã§ekildi
KAYSERÄ° Ã§ekildi
KIRKLARELÄ° Ã§ekildi
KIRÅžEHÄ°R Ã§ekildi
KOCAELÄ° Ã§ekildi
KONYA Ã§ekildi
KÃœTAHYA Ã§ekildi
MALATYA Ã§ekildi
MANÄ°SA Ã§ekildi
KAHRAMANMARAÅž Ã§ekildi
MARDÄ°N Ã§ekildi
MUÄžLA Ã§ekildi
MUÅž Ã§ekildi
NEVÅžEHÄ°R Ã§ekildi
NÄ°ÄžDE Ã§ekildi
ORDU Ã§ekildi
RÄ°ZE Ã§ekildi
SAKARYA Ã§ekildi
SAMSUN Ã§ekildi
SÄ°Ä°RT Ã§ekildi
SÄ°NO

In [10]:
domates_df=domates_df[domates_df["brand"]=="MarkasÄ±z"]

In [11]:
domates_df[domates_df["brand"]=="MarkasÄ±z"].groupby("taranan_merkez")["price"].mean().sort_values()

taranan_merkez
Ä°SLAHÄ°YE      40.644118
KÄ°LÄ°S         41.435294
POLATELÄ°      42.908333
Ä°ZMÄ°R         43.108824
ÅžANLIURFA     43.175000
                ...    
ZONGULDAK     49.757895
KIRKLARELÄ°    49.778571
ÅžÄ°LE          49.847727
BOZÃœYÃœK       50.010000
DÃœZCE         50.527778
Name: price, Length: 229, dtype: float64

In [21]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from matplotlib.cm import ScalarMappable

def create_choropleth_map(df, shp_path):
    """
    TÃ¼rkiye il sÄ±nÄ±rlarÄ± Ã¼zerinde fiyat daÄŸÄ±lÄ±mÄ±nÄ± gÃ¶steren Ä±sÄ± haritasÄ± (matplotlib ile)
    """
    # Shapefile'Ä± yÃ¼kle
    turkey_geo = gpd.read_file(shp_path)
    
    # Ä°l bazlÄ± medyan fiyatlarÄ± hesapla
    il_fiyatlari = df.groupby("il_plaka")["price"].mean()
    
    # GeoDataFrame'e fiyatlarÄ± ekle
    turkey_geo['fiyat'] = turkey_geo['plaka'].map(il_fiyatlari)
    
    # Plot oluÅŸtur
    fig, ax = plt.subplots(1, 1, figsize=(12, 10))
    
    # TÃ¼rkiye haritasÄ±nÄ± Ã§iz
    turkey_geo.boundary.plot(ax=ax, linewidth=1, color="black")
    
    # Fiyatlar ile il sÄ±nÄ±rlarÄ±nÄ± doldur
    norm = Normalize(vmin=il_fiyatlari.min(), vmax=il_fiyatlari.max())
    sm = ScalarMappable(cmap="YlOrRd", norm=norm)
    turkey_geo.plot(column='fiyat', ax=ax, legend=True, cmap="YlOrRd", 
                    legend_kwds={'label': "Dana KÄ±yma FiyatÄ± (TL)", 'orientation': "horizontal"})
    
    # BaÅŸlÄ±k ve etiketler
    ax.set_title("Ä°l BazlÄ± Dana KÄ±yma Ortalama FiyatÄ± DaÄŸÄ±lÄ±mÄ±", fontsize=16)


    # Grafik etrafÄ±ndaki beyaz boÅŸluklarÄ± kaldÄ±rmak iÃ§in
    plt.subplots_adjust(left=0, right=1, top=1, bottom=0)

    # HaritayÄ± PNG olarak kaydet
    plt.savefig('domates_fiyat_haritasi_matplotlib.png', format='png', dpi=300, bbox_inches='tight')
    plt.close()

    print("Harita 'domates_fiyat_haritasi_matplotlib.png' olarak kaydedildi.")

# Shapefile yolu
shp_path = "C:/Users/Bora/Documents/GitHub/web-tufe/updated_turkey_geo.shp"  # shapefile'Ä±nÄ±zÄ±n tam yolunu belirtin

# HaritayÄ± oluÅŸtur
create_choropleth_map(domates_df, shp_path)

# Ä°statistikleri gÃ¶ster
il_fiyatlari = domates_df.groupby("il_plaka")["price"].median()
print("\nFiyat Ä°statistikleri:")
print(f"En dÃ¼ÅŸÃ¼k il ortalamasÄ±: {il_fiyatlari.min():.2f} TL")
print(f"En yÃ¼ksek il ortalamasÄ±: {il_fiyatlari.max():.2f} TL")
print(f"TÃ¼rkiye ortalamasÄ±: {il_fiyatlari.mean():.2f} TL")

# En ucuz ve en pahalÄ± 5 il
print("\nEn Ucuz 5 Ä°l:")
print(il_fiyatlari.sort_values().head())
print("\nEn PahalÄ± 5 Ä°l:")
print(il_fiyatlari.sort_values().tail())


Harita 'domates_fiyat_haritasi_matplotlib.png' olarak kaydedildi.

Fiyat Ä°statistikleri:
En dÃ¼ÅŸÃ¼k il ortalamasÄ±: 37.00 TL
En yÃ¼ksek il ortalamasÄ±: 45.90 TL
TÃ¼rkiye ortalamasÄ±: 42.76 TL

En Ucuz 5 Ä°l:
il_plaka
79    37.000
27    37.000
75    39.900
69    39.900
46    39.925
Name: price, dtype: float64

En PahalÄ± 5 Ä°l:
il_plaka
6     45.9
34    45.9
26    45.9
47    45.9
40    45.9
Name: price, dtype: float64


In [17]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point


turkey_geo = gpd.read_file(shp_path)



# Create a GeoDataFrame for il_df with lat and lon
il_df['geometry'] = il_df.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
il_gdf = gpd.GeoDataFrame(il_df, geometry='geometry', crs=turkey_geo.crs)

# Ensure CRS consistency
il_gdf = il_gdf.set_crs(turkey_geo.crs, allow_override=True)

# Perform spatial join with proximity-based matching using 'dwithin'
joined = gpd.sjoin(turkey_geo, il_gdf, how="left", predicate='dwithin', distance=0.01)

# Drop duplicates to avoid multiple plaka values for the same polygon
joined = joined.drop_duplicates(subset=['geometry'])

# Check the result to ensure proper matching
print(joined[['plaka', 'geometry']].head())

# Update the 'plaka' in turkey_geo with the joined 'plaka' from il_df
turkey_geo['plaka'] = joined['plaka']

# Save the updated shapefile if needed
turkey_geo.to_file("updated_turkey_geo.shp")


   plaka                                           geometry
0      1  MULTIPOLYGON (((35.41434 36.5882, 35.41347 36....
1      2  POLYGON ((38.1033 37.9024, 38.11007 37.90416, ...
2      3  POLYGON ((30.19456 37.87817, 30.20171 37.86722...
3      4  POLYGON ((43.10198 39.32246, 43.0951 39.32087,...
4     68  POLYGON ((33.24951 38.26498, 33.26122 38.27609...


In [None]:
domates_df=pd.DataFrame()
for i in range(len(secili_merkezler)):
    a=market_fiyat_api("domates",lat=secili_merkezler["lat"].iloc[i],lng=secili_merkezler["lng"].iloc[i])
    a["taranan_merkez"]=secili_merkezler["ilce_adi"].iloc[i]
    a["taranan_lat"]=secili_merkezler["lat"].iloc[i]
    a["taranan_lng"]=secili_merkezler["lng"].iloc[i]
    a["il_plaka"]=secili_merkezler["il_plaka"].iloc[i]
    ilce=secili_merkezler["ilce_adi"].iloc[i]
    domates_df=pd.concat([domates_df,a],axis=0)
    print(f"{ilce} Ã§ekildi")

In [32]:
pd.read_csv("aÄŸÄ±rlÄ±klar.csv")["ÃœrÃ¼n"].to_csv("maddeler.csv")

In [4]:
sepet=pd.read_csv("sepet.csv").iloc[:,[0,1]]
sepet.columns=["Madde","ÃœrÃ¼n"]
sepet.to_csv("eskisepet.csv")

In [8]:
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import torch
from sklearn.model_selection import train_test_split

# BERT Tokenizer'Ä± yÃ¼kleyelim
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Veriyi BERT'e uygun hale getirelim
eskisepet_new_df = sepet[['ÃœrÃ¼n', 'Madde']]

# ÃœrÃ¼n isimlerini tokenize edelim
def tokenize_function(examples):
    return tokenizer(examples['ÃœrÃ¼n'], padding="max_length", truncation=True, max_length=64)

# DataFrame'i Dataset formatÄ±na dÃ¶nÃ¼ÅŸtÃ¼rme
dataset = Dataset.from_pandas(eskisepet_new_df)

# Tokenize iÅŸlemi
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Etiketleri sayÄ±sal verilere dÃ¶nÃ¼ÅŸtÃ¼rme
label2id = {label: i for i, label in enumerate(eskisepet_new_df['Madde'].unique())}

# Madde etiketini doÄŸru ÅŸekilde almak iÃ§in gÃ¼ncelleme
# Madde etiketini doÄŸru ÅŸekilde almak iÃ§in gÃ¼ncelleme
tokenized_datasets = tokenized_datasets.map(
    lambda x: {'labels': [label2id[label] for label in x['Madde']]}, 
    batched=True
)



# EÄŸitim ve test verilerini ayÄ±ralÄ±m
train_dataset, test_dataset = tokenized_datasets.train_test_split(test_size=0.2).values()

# Modeli yÃ¼kleyelim (BERT)
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(eskisepet_new_df['Madde'].unique()))

# EÄŸitim argÃ¼manlarÄ±nÄ± belirleyelim
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

# Trainer'Ä± oluÅŸturup eÄŸitelim
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset
)

# Modeli eÄŸitelim
trainer.train()


Map:   0%|          | 0/20419 [00:00<?, ? examples/s]

Map:   0%|          | 0/20419 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,1.2199,0.982766


RuntimeError: [enforce fail at inline_container.cc:603] . unexpected pos 527327360 vs 527327252

In [None]:
# BERT modelini kaydedelim
model.save_pretrained('./bert_model')
tokenizer.save_pretrained('./bert_model')
