In [None]:
from geopy.distance import geodesic
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import numpy as np

# GPU 사용 가능 확인 및 device 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 토크나이저 로딩
tokenizer_roberta = AutoTokenizer.from_pretrained("klue/roberta-large")
tokenizer_electra = AutoTokenizer.from_pretrained("monologg/koelectra-base-v3-discriminator")

# 로드할 RoBERTa 모델 경로
roberta_model_path = "/content/drive/MyDrive/to/save/roberta_model"
# 로드할 Electra 모델 경로
electra_model_path = "/content/drive/MyDrive/to/save/electra_model"

# 저장된 모델 로드
model_roberta = AutoModelForSequenceClassification.from_pretrained(roberta_model_path)
model_electra = AutoModelForSequenceClassification.from_pretrained(electra_model_path)

def calculate_distance(point1, point2):
    # 두 지점 사이의 거리를 계산하는 함수
    distance_to_station = geodesic(point1, point2).kilometers
    return distance_to_station



def label_to_value(label):
    if label == 0:
        return -30
    elif label == 1:
        return 30
    elif label == 2:
        return -50
    elif label == 3:
        return 50
    else:
        return 0

def logits_to_probs(logits):
  return torch.nn.functional.softmax(logits, dim=1)

def predict_with_ensemble_modified(texts, roberta_model, koelectra_model, tokenizer_roberta, tokenizer_koelectra, device):
    encodings_roberta = tokenizer_roberta(texts, truncation=True, padding=True, max_length=128, return_tensors="pt")
    encodings_koelectra = tokenizer_koelectra(texts, truncation=True, padding=True, max_length=128, return_tensors="pt")

    roberta_input_ids, roberta_attention_mask = encodings_roberta['input_ids'].to(device), encodings_roberta['attention_mask'].to(device)
    koelectra_input_ids, koelectra_attention_mask = encodings_koelectra['input_ids'].to(device), encodings_koelectra['attention_mask'].to(device)

    roberta_model.to(device)
    koelectra_model.to(device)

    roberta_model.eval()
    koelectra_model.eval()

    with torch.no_grad():
        roberta_outputs = roberta_model(roberta_input_ids, roberta_attention_mask)
        koelectra_outputs = koelectra_model(koelectra_input_ids, koelectra_attention_mask)

        roberta_probs = logits_to_probs(roberta_outputs.logits).cpu().numpy()
        koelectra_probs = logits_to_probs(koelectra_outputs.logits).cpu().numpy()

        ensemble_probs = (roberta_probs + koelectra_probs) / 2

        final_labels = []

        for probs in ensemble_probs:
            pred_label = np.argmax(probs)
            if pred_label == 1 and probs[pred_label] > 0.6:  # 긍정이면서 확률이 0.8 이상인 경우
                pred_label = 3  # 매우 긍정으로 변경
            elif pred_label == 0 and probs[pred_label] > 0.7:  # 부정이면서 확률이 0.8 이상인 경우
                pred_label = 2  # 매우 부정으로 변경
            final_labels.append(pred_label)

    return final_labels

# calculate_score_for_person 함수 내에서의 수정
def calculate_score_for_person(person_info, model_roberta, model_electra, tokenizer_roberta, tokenizer_electra, device):
    # 각 변수의 가중치
    weight_distance = 0.5
    weight_attendance = 0.5
    weight_work_frequency = 0.5
    weight_label_value = 0.5
    # 성별에 따라 가중치 조정
    gender_weight = 0.5 if person_info['gender'] == 'male' else 0

    # 현장과 지하철역의 좌표값
    work_location = (person_info['work_location_x'], person_info['work_location_y'])
    subway_station = (person_info['subway_station_x'], person_info['subway_station_y'])

    # 현장과 지하철역의 거리 계산 (km 단위로 환산)
    distance_to_station = calculate_distance(work_location, subway_station)

    # 출석률 계산
    actual_work_days = person_info['actual_work_days']
    applied_work_days = person_info['applied_work_days']
    attendance_rate = actual_work_days / applied_work_days * 100

    # 출석률에 따라 점수 할당
    if attendance_rate >= 100:
        attendance_score = 30
    elif attendance_rate >= 90:
        attendance_score = 10
    else:
        attendance_score = 100

    # 근무횟수에 따라 점수 할당
    if person_info['work_frequency'] >= 15:
        work_frequency_score = 30
    elif person_info['work_frequency'] >= 10:
        work_frequency_score = 20
    elif person_info['work_frequency'] >= 5:
        work_frequency_score = 5
    else:
        work_frequency_score = 0

    # 거리에 따라 점수 할당
    if distance_to_station < 10:
        distance_score = 50
    elif distance_to_station < 30:
        distance_score = 40
    elif distance_to_station < 50:
        distance_score = 30
    elif distance_to_station < 100:
        distance_score = 20
    elif distance_to_station < 200:
        distance_score = 10
    else:
        distance_score = 0
    # 텍스트 예측을 위한 코드 추가
    texts_to_predict = [person_info['texts_to_predict']]  # 텍스트를 리스트로 변환
    final_labels = predict_with_ensemble_modified(texts_to_predict, model_roberta, model_electra, tokenizer_roberta, tokenizer_electra, device)
    print(final_labels)
    for label in final_labels:
      sentiment_score = label_to_value(label)




    # 각 변수에 가중치를 곱하여 합산된 점수를 계산
    total_score = (distance_score * weight_distance) + \
                  (attendance_score * weight_attendance) + \
                  (work_frequency_score * weight_work_frequency) + \
                  gender_weight + \
                  (sentiment_score * weight_label_value)

    return total_score

# 예시 데이터베이스에서 각 사람들에 대한 정보 수정
people_info = [
    {'work_location_x': 37.5665, 'work_location_y': 126.9780, 'subway_station_x': 37.5599, 'subway_station_y': 126.9436, 'gender': 'male', 'actual_work_days': 20, 'applied_work_days': 20, 'work_frequency': 20, 'texts_to_predict': "일을 매우 성실하고 착하고 그냥 너무 사람이 좋음."},
    {'work_location_x': 37.5665, 'work_location_y': 126.9780, 'subway_station_x': 37.5599, 'subway_station_y': 126.9780, 'gender': 'male', 'actual_work_days': 10, 'applied_work_days': 10, 'work_frequency': 25, 'texts_to_predict': "불성실하고 매우 필요없음 그냥 출근하지 않는게 나음"}
]

# 각 사람들의 점수 계산
for person_info in people_info:
    score = calculate_score_for_person(person_info, model_roberta, model_electra, tokenizer_roberta, tokenizer_electra, device)
    print("총점:", score)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/Colab Notebooks/Github/capstone-2024-35

In [None]:
!git config --global user.email 'osoryo@naver.com'
!git config --global user.name 'ChamsolPark'

!git add Recommendation_Model/recommendation_model.ipynb
!git commit -m "Add predict to Recommendation_Model"

# 원격 저장소와 충돌 방지를 위해 먼저 pull
!git pull origin master

# 변경 사항 push
!git push origin master