# zip export

In [3]:
import zipfile
import os
import shutil

DATASET_ORDER = [
    "WLAN (WiFi) RSS database for fingerprinting positioning",
    "WiFi RSS measurements in Tampere University multi-building campus 2017",
    "Wi-Fi Fingerprinting dataset with multiple simultaneous interfaces",
    "UJIndoorLoc",
    "DSI_dataset",
    "Crowdsourced WiFi database and benchmark software for indoor positioning"
]

def flatten_folder(folder_path):
    """Eƒüer klas√∂rde sadece tek bir alt klas√∂r varsa, onun i√ßeriƒüini yukarƒ± ta≈üƒ±r."""
    entries = os.listdir(folder_path)
    if len(entries) == 1:
        subfolder = os.path.join(folder_path, entries[0])
        if os.path.isdir(subfolder):
            for item in os.listdir(subfolder):
                src = os.path.join(subfolder, item)
                dst = os.path.join(folder_path, item)
                shutil.move(src, dst)
            shutil.rmtree(subfolder)
            print(f"   ‚îî‚îÄ Flattened folder structure inside '{folder_path}'")

def extract_datasets(outer_zip="datasets.zip", output_dir="data"):
    os.makedirs(output_dir, exist_ok=True)
    
    with zipfile.ZipFile(outer_zip, 'r') as outer:
        inner_files = [f for f in outer.namelist() if f.lower().endswith(".zip")]
        print(f"Found {len(inner_files)} inner zip files.")
        
        for idx, pattern in enumerate(DATASET_ORDER, start=1):
            match = next((f for f in inner_files if pattern.lower() in f.lower()), None)
            if not match:
                print(f"‚ö†Ô∏è Could not find dataset for pattern: {pattern}")
                continue
            
            target_folder = os.path.join(output_dir, str(idx))
            os.makedirs(target_folder, exist_ok=True)
            
            print(f"[{idx}] Extracting '{match}' ‚Üí {target_folder}")
            with outer.open(match) as inner_zip_file:
                with zipfile.ZipFile(inner_zip_file) as inner_zip:
                    inner_zip.extractall(target_folder)
            
            # Flatten klas√∂r yapƒ±sƒ±
            flatten_folder(target_folder)
    
    print("‚úÖ All datasets extracted and flattened successfully.")

if __name__ == "__main__":
    extract_datasets()


Found 6 inner zip files.
[1] Extracting 'datasets/WLAN (WiFi) RSS database for fingerprinting positioning.zip' ‚Üí data\1
   ‚îî‚îÄ Flattened folder structure inside 'data\1'
[2] Extracting 'datasets/WiFi RSS measurements in Tampere University multi-building campus 2017 - Zenodo 5174851.zip' ‚Üí data\2
   ‚îî‚îÄ Flattened folder structure inside 'data\2'
[3] Extracting 'datasets/Wi-Fi Fingerprinting dataset with multiple simultaneous interfaces.zip' ‚Üí data\3
   ‚îî‚îÄ Flattened folder structure inside 'data\3'
[4] Extracting 'datasets/UJIndoorLoc.zip' ‚Üí data\4
   ‚îî‚îÄ Flattened folder structure inside 'data\4'
[5] Extracting 'datasets/DSI_dataset.zip' ‚Üí data\5
[6] Extracting 'datasets/Crowdsourced WiFi database and benchmark software for indoor positioning.zip' ‚Üí data\6
‚úÖ All datasets extracted and flattened successfully.


#### Converts RSS and coordinate  information from different WiFi positioning (fingerprinting) data sets to JSON format.

In [5]:
import os
import time
import sklearn
import numpy as np
import pandas as pd
from collections import defaultdict
from tqdm import tqdm
import json


def find_the_way(path,file_format,con=""):
    files_add = []
    # r=root, d=directories, f = files
    for r, d, f in os.walk(path):
        for file in f:
            if file_format in file:
                if con in file:
                    files_add.append(os.path.join(r, file))  
            
    return files_add




files_add=find_the_way("./data/4/",".csv")
files_add








for file in files_add :
    j_rssi = defaultdict(dict)
    j_coo = defaultdict(dict)
    rssis= pd.read_csv(file)#,header=None)
    cols=['LONGITUDE', 'LATITUDE', 'FLOOR', 'BUILDINGID', 'SPACEID','RELATIVEPOSITION', 'USERID', 'PHONEID', 'TIMESTAMP']
    lon=rssis['LONGITUDE'].values
    lat=rssis['LATITUDE'].values
    for col in cols:
        del rssis[col]
    for ii,i in enumerate(rssis.values):
        for jj, j in enumerate(i):
            if j!=100:
                j_rssi[int(ii)][int(jj)]=int(j)
                j_coo[ii]=[lon[ii],lat[ii]]
   
    with open(file.replace("csv","_rss.json"), 'w', encoding='utf-8') as f:
        json.dump(j_rssi, f, ensure_ascii=False, indent=4)

    with open(file.replace("csv","_coo.json"), 'w', encoding='utf-8') as f:
        json.dump(j_coo, f, ensure_ascii=False, indent=4)
    
    print(f"‚úÖ  {file}")

‚úÖ  ./data/4/trainingData.csv
‚úÖ  ./data/4/validationData.csv


#### Calculates the distances between different measurement points in WiFi positioning data (based on coordinates) and saves them to CSV files.

In [6]:
import json
import itertools
import math
import csv
from tqdm import tqdm
import os

In [7]:
def is_global_coordinate(coord):
    """
   Checks whether the coordinates are global.
   - If the latitude is between (-90, 90) and the longitude is between (-180, 180), they are global.
   - If both coordinates are above 90, they are considered local X/Y.
    """
    lat, lon = coord
    if -90 <= lat <= 90 and -180 <= lon <= 180:
        return True  # K√ºresel koordinat (lat/lon)
    if lat > 90 and lon > 90:
        return False  # Yerel koordinat (X/Y)
    
    # ≈û√ºpheli durumlarda uyarƒ± ver
    print(f"UYARI: Koordinatlar tam tespit edilemedi, varsayƒ±lan olarak yerel kabul ediliyor! {coord}")
    return False  # Varsayƒ±lan olarak yerel kabul et


def haversine_distance(lat1, lon1, lat2, lon2):
    """Haversine form√ºl√º ile iki k√ºresel koordinat arasƒ±ndaki mesafeyi hesaplar (km cinsinden)."""
    R = 6371  # D√ºnya'nƒ±n yarƒ±√ßapƒ± (km)
    
    # Dereceden radyana √ßevirme
    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
    
    # Farklar
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    
    # Haversine form√ºl√º
    a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    
    return R * c  # Sonu√ß km cinsinden d√∂nd√ºr√ºl√ºr

def euclidean_distance(x1, y1, x2, y2):
    """√ñklidyen mesafe hesaplar (yerel koordinatlar i√ßin)."""
    return math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)

def calculate_distance(coord1, coord2):
    """
    Koordinatlarƒ±n t√ºr√ºne g√∂re uygun mesafe hesaplama y√∂ntemi se√ßer.
    - Eƒüer her iki koordinat da lat/lon ise Haversine
    - Eƒüer her ikisi de x/y ise √ñklidyen
    - Karƒ±≈üƒ±k giri≈ülerde None d√∂nd√ºrerek i≈ülemi atlar.
    """
    is_global_1 = is_global_coordinate(coord1)
    is_global_2 = is_global_coordinate(coord2)

    if is_global_1 and is_global_2:
        return haversine_distance(*coord1, *coord2), "Haversine"
    elif not is_global_1 and not is_global_2:
        return euclidean_distance(*coord1, *coord2), "Euclidean"
    else:
        return None, "Ge√ßersiz"  # None d√∂nd√ºrerek hatayƒ± √∂nle


def intersection_count(list1, list2):
    """ƒ∞ki listenin ortak eleman sayƒ±sƒ±nƒ± d√∂nd√ºr√ºr."""
    return len(set(list1) & set(list2))


In [8]:
prelist=find_the_way("./data/4",".json","")
files_add =  dict(zip(prelist[1::2], prelist[0::2]))
for i in files_add:
    print(i)

./data/4\trainingData._rss.json
./data/4\validationData._rss.json


In [9]:
for file in files_add:
    # JSON dosyalarƒ±nƒ± oku
    with open(file, "r") as f:
         rss= json.load(f)


    with open(files_add[file], "r") as f:
        data  = json.load(f)

        
    # Verileri uygun formata √ßevir (Anahtar: Nokta ID, Deƒüer: (x, y) veya (lat, lon))
    points = {int(k): tuple(v) for k, v in data.items()}
    # CSV dosyasƒ±nƒ± a√ß ve verileri anlƒ±k olarak yaz
    with open(file.replace(".json","@.csv"), "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["id1", "id2", "estimated_distance", "method"])  # Ba≈ülƒ±k satƒ±rƒ±
		


        total = len(points) * (len(points) - 1) // 2  # kombinasyon sayƒ±sƒ±

        for (i, point1), (j, point2) in tqdm(itertools.combinations(points.items(), 2), total=total):

                list1 = list(rss[str(i)].keys())
                list2 = list(rss[str(j)].keys())
        
                if intersection_count(list1, list2):  # Ortak veri varsa mesafeyi hesapla
                    try:
                        distance = euclidean_distance(point1[0],point1[1],point2[0],point2[1])
                        writer.writerow([i, j, round(distance, 2), "euclidean_distance"])  # Dosyaya yaz
                    except ValueError as e:
                        print(f"Hata: {e}")
    print(f"{file}  kaydedildi.")


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 197219730/197219730 [21:47<00:00, 150872.76it/s]


./data/4\trainingData._rss.json  kaydedildi.


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 616605/616605 [00:04<00:00, 125229.15it/s]

./data/4\validationData._rss.json  kaydedildi.





####  It calculates various distance metrics (e.g. correlation, Euclidean, cosine, Jaccard) to measure the similarity or difference between WiFi fingerprint data, and records these as feature extraction in CSV files..

In [10]:
import csv
import numpy as np
import scipy.spatial.distance
from tqdm import tqdm

## 1.4 Feature Extraction (Intersection Based) - Simplified
def feature_extraction_file(data, name, fps):
    features = [["correlation",
                "chebyshev", 
                "intersecting_mac",
                "euclidean",
                "cosine",
                "jensenshannon",
                "jaccard",
                "canberra",
                "minkowski",
                "real"]]
    
    for i in tqdm(data, position=0, leave=True):
        fp1 = fps[i[0]]
        fp2 = fps[i[1]]
        feature = feature_extraction(fp1, fp2) 
        feature.append(i[2])
        features.append(feature)
    
    with open(name, "w", newline='') as f:
        writer = csv.writer(f)
        writer.writerows(features) 

def feature_extraction(fp1, fp2):
    # Kesi≈üim (intersection) kullanarak ortak MAC adreslerini bul
    common_macs = set(fp1.keys()).intersection(set(fp2.keys()))
    
    # Eƒüer kesi≈üim bo≈üsa, t√ºm mesafeleri 0 olarak d√∂nd√ºr
    if not common_macs:
        intersecting_mac = 0
        # Bo≈ü vekt√∂rler i√ßin mesafe hesaplamalarƒ±
        output_data = [0, 0, intersecting_mac, 0, 0, 0, 0, 0, 0]
        return output_data
    
    # Kesi≈üen MAC sayƒ±sƒ±
    intersecting_mac = len(common_macs)
    
    # Sadece ortak MAC'ler i√ßin vekt√∂rleri olu≈ütur
    f1 = [fp1[mac] for mac in common_macs]
    f2 = [fp2[mac] for mac in common_macs]
    
    # G√ºvenli mesafe hesaplamalarƒ±
    def safe_distance(func, v1, v2, default_value=0):
        """Mesafe hesaplama fonksiyonunu g√ºvenli ≈üekilde √ßalƒ±≈ütƒ±rƒ±r"""
        try:
            result = func(v1, v2)
            if np.isnan(result) or np.isinf(result):
                return default_value
            return result
        except (ValueError, ZeroDivisionError, RuntimeWarning):
            return default_value
    
    # Vekt√∂rleri numpy array'e √ßevir
    f1_arr = np.array(f1, dtype=float)
    f2_arr = np.array(f2, dtype=float)
    
    # √ñzel durumlarƒ± kontrol et
    f1_std = np.std(f1_arr)
    f2_std = np.std(f2_arr)
    
    # Mesafe hesaplamalarƒ±
    correlation = safe_distance(scipy.spatial.distance.correlation, f1_arr, f2_arr, 1.0)
    chebyshev = safe_distance(scipy.spatial.distance.chebyshev, f1_arr, f2_arr, 0)
    euclidean = safe_distance(scipy.spatial.distance.euclidean, f1_arr, f2_arr, 0)
    
    # Cosine i√ßin √∂zel kontrol (sƒ±fƒ±r vekt√∂r kontrol√º)
    if f1_std == 0 and f2_std == 0:
        cosine = 0  # ƒ∞ki vekt√∂r de sabit ise mesafe sƒ±fƒ±r
    elif f1_std == 0 or f2_std == 0:
        cosine = 1  # Bir vekt√∂r sabit ise maksimum mesafe
    else:
        cosine = safe_distance(scipy.spatial.distance.cosine, f1_arr, f2_arr, 1.0)
    
    jensenshannon = safe_distance(scipy.spatial.distance.jensenshannon, f1_arr, f2_arr, 0)
    canberra = safe_distance(scipy.spatial.distance.canberra, f1_arr, f2_arr, 0)
    minkowski = safe_distance(scipy.spatial.distance.minkowski, f1_arr, f2_arr, 0)
    
    # Jaccard mesafesi i√ßin binary vekt√∂rler (g√ºvenli hesaplama)
    threshold = -70  
    f1_binary = [1 if x > threshold else 0 for x in f1]
    f2_binary = [1 if x > threshold else 0 for x in f2]
    
    # Jaccard i√ßin √∂zel kontrol (t√ºm sƒ±fƒ±r vekt√∂r kontrol√º)
    if sum(f1_binary) == 0 and sum(f2_binary) == 0:
        jaccard = 0  # ƒ∞ki vekt√∂r de t√ºm sƒ±fƒ±r ise benzer
    elif sum(f1_binary) == 0 or sum(f2_binary) == 0:
        jaccard = 1  # Bir vekt√∂r t√ºm sƒ±fƒ±r ise maksimum mesafe
    else:
        jaccard = safe_distance(scipy.spatial.distance.jaccard, f1_binary, f2_binary, 1.0)
    
    output_data = [correlation,
                  chebyshev,
                  intersecting_mac,
                  euclidean,
                  cosine,
                  jensenshannon,
                  jaccard,
                  canberra,
                  minkowski]
    
    # NaN deƒüerleri 0 ile deƒüi≈ütir
    output_data = [0 if x != x else x for x in output_data]
    
    return output_data

In [12]:
prelist=find_the_way("./data/4/","@.csv","")
prelist

['./data/4/validationData._rss@.csv']

In [13]:
for p in prelist:

    print(p)

    with open(p.replace("@.csv",".json")) as f:
        fps_train = json.load(f)
    
    with open(p) as f:
        train_data = []
        train_h = csv.DictReader(f)
        for pair in tqdm(train_h):
            train_data.append([pair['id1'],pair['id2'],float(pair['estimated_distance'])])
    print("Train Data loaded!!")
    feature_extraction_file(train_data,p.replace("@.csv","-distance.csv"),fps_train)
    print("\n\n")
    print("*"*100)

./data/4/validationData._rss@.csv


174416it [00:00, 179518.82it/s]


Train Data loaded!!


  dist = 1.0 - uv / math.sqrt(uu * vv)
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 174416/174416 [01:10<00:00, 2479.74it/s]





****************************************************************************************************


### Wifi Distance Estimation Model - XGB

In [27]:
#!/usr/bin/env python
# coding: utf-8

import warnings
warnings.filterwarnings("ignore")

import os
import pickle
import numpy as np
import pandas as pd
from datetime import datetime

# Derin √ñƒürenme modeli i√ßin
from tensorflow.keras.models import load_model

def load_trained_model(model_name):
    """Eƒüitilmi≈ü modeli y√ºkler"""
    try:
        if model_name == "ANN":
            model_path = f"regression_models/{model_name}_model.h5"
            model = load_model(model_path)
        else:
            model_path = f"regression_models/{model_name}_model.pkl"
            with open(model_path, "rb") as f:
                model = pickle.load(f)
        print(f"‚úÖ {model_name} modeli ba≈üarƒ±yla y√ºklendi: {model_path}")
        return model
    except Exception as e:
        print(f"‚ùå Hata: {model_name} modeli y√ºklenemedi! - {e}")
        return None

def predict_and_save(model_name, data_path, output_folder="prediction_outputs"):
    """
    Veriyi y√ºkler, model ile tahmin yapar ve sonu√ßlarƒ± kaydeder
    
    Parametreler:
    - model_name: Kullanƒ±lacak model adƒ± (√∂rn: "BR", "XGB", "ANN")
    - data_path: Tahmin yapƒ±lacak CSV dosyasƒ±nƒ±n yolu
    - output_folder: Sonu√ßlarƒ±n kaydedileceƒüi klas√∂r
    """
    
    # √áƒ±ktƒ± klas√∂r√ºn√º olu≈ütur
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    print(f"\n{'='*50}")
    print(f"üìä Model: {model_name}")
    print(f"üìÅ Veri: {data_path}")
    print(f"{'='*50}\n")
    
    # 1. Modeli Y√ºkle
    model = load_trained_model(model_name)
    if model is None:
        return
    
    # 2. Veriyi Y√ºkle
    try:
        data_df = pd.read_csv(data_path)
        print(f"‚úÖ Veri ba≈üarƒ±yla y√ºklendi: {data_df.shape[0]} satƒ±r, {data_df.shape[1]} s√ºtun")
    except Exception as e:
        print(f"‚ùå Hata: Veri y√ºklenemedi! - {e}")
        return
    
    # 3. √ñzellik √ßƒ±karƒ±mƒ± (eƒüer 'real' s√ºtunu varsa kaldƒ±r)
    if 'real' in data_df.columns:
        X_data = data_df.drop('real', axis=1)
        has_real_values = True
        real_values = data_df['real']
    else:
        X_data = data_df
        has_real_values = False
        real_values = None
    
    # 4. Tahmin Yap
    print("üîÆ Tahmin yapƒ±lƒ±yor...")
    try:
        if model_name == "ANN":
            predictions = model.predict(X_data).flatten()
        else:
            predictions = model.predict(X_data)
        print(f"‚úÖ Tahmin tamamlandƒ±! {len(predictions)} adet tahmin √ºretildi.")
    except Exception as e:
        print(f"‚ùå Hata: Tahmin yapƒ±lamadƒ±! - {e}")
        return
    
    # 5. Sonu√ßlarƒ± Hazƒ±rla
    result_df = pd.DataFrame()
    
    if has_real_values:
        result_df['real'] = real_values
        result_df['estimated'] = predictions
        result_df['error'] = result_df['real'] - result_df['estimated']
        result_df['absolute_error'] = np.abs(result_df['error'])
        
        # Hata istatistikleri
        mae = np.mean(result_df['absolute_error'])
        rmse = np.sqrt(np.mean(result_df['error']**2))
        print(f"\nüìà Performans Metrikleri:")
        print(f"   MAE (Ortalama Mutlak Hata): {mae:.4f}")
        print(f"   RMSE (K√∂k Ortalama Kare Hata): {rmse:.4f}")
    else:
        result_df['estimated'] = predictions
    
    # 6. Sonu√ßlarƒ± Kaydet
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_filename =data_path.replace("._rss-distance.csv","_predictions.csv")
    
    result_df.to_csv(output_filename, index=False)
    print(f"\n‚úÖ Sonu√ßlar kaydedildi: {output_filename}")
    print(f"\n{'='*50}\n")
    
    return result_df



In [30]:
files_add=find_the_way("./data/4/","rss-distance.csv")
files_add

['./data/4/trainingData._rss-distance.csv',
 './data/4/validationData._rss-distance.csv']

In [31]:
for i in files_add:
    folder_path = os.path.dirname(os.path.abspath(i))
    
    # Kullanƒ±labilir modeller
    available_models = [ "XGB"]  # "DTR", "KNN", "LR", "ANN" ekleyebilirsiniz
    
    # Tahmin yapƒ±lacak veri dosyasƒ±
    data_file = i
    
    # Her model i√ßin tahmin yap
    for model_name in available_models:
        predict_and_save(
            model_name=model_name,
            data_path=data_file,
            output_folder=folder_path
        )
    
    print("\nüéâ T√ºm tahminler tamamlandƒ±!")


üìä Model: XGB
üìÅ Veri: ./data/4/trainingData._rss-distance.csv

‚úÖ XGB modeli ba≈üarƒ±yla y√ºklendi: regression_models/XGB_model.pkl
‚úÖ Veri ba≈üarƒ±yla y√ºklendi: 67588605 satƒ±r, 10 s√ºtun
üîÆ Tahmin yapƒ±lƒ±yor...
‚úÖ Tahmin tamamlandƒ±! 67588605 adet tahmin √ºretildi.

üìà Performans Metrikleri:
   MAE (Ortalama Mutlak Hata): 35.7442
   RMSE (K√∂k Ortalama Kare Hata): 58.7216

‚úÖ Sonu√ßlar kaydedildi: ./data/4/trainingData_predictions.csv



üéâ T√ºm tahminler tamamlandƒ±!

üìä Model: XGB
üìÅ Veri: ./data/4/validationData._rss-distance.csv

‚úÖ XGB modeli ba≈üarƒ±yla y√ºklendi: regression_models/XGB_model.pkl
‚úÖ Veri ba≈üarƒ±yla y√ºklendi: 174416 satƒ±r, 10 s√ºtun
üîÆ Tahmin yapƒ±lƒ±yor...
‚úÖ Tahmin tamamlandƒ±! 174416 adet tahmin √ºretildi.

üìà Performans Metrikleri:
   MAE (Ortalama Mutlak Hata): 27.9444
   RMSE (K√∂k Ortalama Kare Hata): 46.1992

‚úÖ Sonu√ßlar kaydedildi: ./data/4/validationData_predictions.csv



üéâ T√ºm tahminler tamamlandƒ±!


# Create and move distance files

In [38]:
real=pd.read_csv("./data/4/validationData._rss@.csv")
est=pd.read_csv("./data/4/validationData_predictions.csv")
real["estimated_distance"]=est["estimated"].values
real.to_csv("./data/4/validationData_WDE.csv", index=False)

In [39]:
real=pd.read_csv("./data/4/trainingData._rss@.csv")
est=pd.read_csv("./data/4/trainingData_predictions.csv")
real["estimated_distance"]=est["estimated"].values
real.to_csv("./data/4/trainingData_WDE.csv", index=False)

In [33]:
folders=["TrainingGeo", "TrainingWDE" , "ValidationGeo", "ValidationWDE"]

In [35]:
for folder_name in folders:
    try:
        os.mkdir(folder_name)
    except FileExistsError:
        pass

In [42]:
import shutil


copies=[['./data/4/trainingData.csv', "TrainingGeo/data.csv"],  ['./data/4/trainingData._rss@.csv', "TrainingGeo/data_distances.csv"],
['./data/4/validationData.csv', "ValidationGeo/data.csv"],  ['./data/4/validationData._rss@.csv', "ValidationGeo/data_distances.csv"],
['./data/4/trainingData.csv', "TrainingWDE/data.csv"],  ['./data/4/trainingData_WDE.csv', "TrainingWDE/data_distances.csv"],
['./data/4/validationData.csv', "ValidationWDE/data.csv"],  ['./data/4/validationData_WDE.csv', "ValidationWDE/data_distances.csv"]]




for c in copies:
    source = c[0]
    destination = c[1]
    shutil.copy2(source, destination)
    print(source, destination)

./data/4/trainingData.csv TrainingGeo/data.csv
./data/4/trainingData._rss@.csv TrainingGeo/data_distances.csv
./data/4/validationData.csv ValidationGeo/data.csv
./data/4/validationData._rss@.csv ValidationGeo/data_distances.csv
./data/4/trainingData.csv TrainingWDE/data.csv
./data/4/trainingData_WDE.csv TrainingWDE/data_distances.csv
./data/4/validationData.csv ValidationWDE/data.csv
./data/4/validationData_WDE.csv ValidationWDE/data_distances.csv
