<a href="https://colab.research.google.com/github/kuansheng-yao/Educational-Big-Data-Projects/blob/main/educational_big_data_project_ver1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 安裝所需的 Python 庫
!pip install gspread gradio openai

import pandas as pd
import gspread
import requests
from oauth2client.service_account import ServiceAccountCredentials
import gradio as gr

# 進行 Google Colab 的身份驗證
from google.colab import auth
auth.authenticate_user()

import os
import json
import openai
from openai import OpenAI

import googleapiclient.discovery
import google.auth
from google.auth.transport.requests import Request
from oauth2client.client import GoogleCredentials
from gspread import authorize

# 確保你已經將 Google 驗證憑證加載到環境中
creds, _ = google.auth.default()
gc = authorize(creds)

# 讀取 Google Spreadsheets 工作表 : record_sheet
record_sheet_id = '1poMhLbeQSrMNnJgTW5zS1bi_hqf6PnHnEi_2aTJ8kRc'
record_sheet_name = 'record'
record_sheet = gc.open_by_key(record_sheet_id).worksheet(record_sheet_name)

# 定義函數，選擇發票 ID 並篩選對應的資料
def choose_invoice_id(df, invoice_id):
    df['Invoice ID'] = df['Invoice ID'].astype(str)  # 將 Invoice ID 欄位轉換為文字格式
    df_filtered = df[df['Invoice ID'] == invoice_id]  # 篩選出符合條件的資料

    if len(df_filtered) > 0:
        invoice_data = df_filtered[['Invoice ID', 'Branch', 'City', 'Customer type', 'Gender', 'Product line', 'Unit price', 'Quantity', 'Tax 5%', 'Total', 'Date', 'Time', 'Payment', 'cogs', 'gross margin percentage', 'gross income', 'Rating']].to_dict(orient='records')[0]
        for key, value in invoice_data.items():
            if pd.isna(value):
                invoice_data[key] = 'nan'

        today = pd.Timestamp('today')  # 取得目前日期
        today_str = today.strftime('%Y-%m-%d')
        invoice_data['Current Date'] = today_str  # 將目前日期加到字典中

        data = pd.DataFrame([invoice_data]).values.tolist()  # 將DataFrame轉換為二維列表
        data = data[0]  # 將列表轉換為一維
        record_sheet.append_row(data)  # 將data導入record sheet

        return invoice_data, data, invoice_id

    else:
        return None, None, invoice_id

# 定義函數，調用 OpenAI API 並回應問題
def call_openai(user_question, df):
    try:
        record_data = df.to_dict(orient='records')
        json_data = json.dumps(record_data)

        # 從文件中讀取 OpenAI API 密鑰
        with open('/content/drive/My Drive/auto certificate/client/auto_cer_openai_apikey.txt', 'r') as f:
            api_key = f.read()

        os.environ["OPENAI_API_KEY"] = api_key

        # 初始化 OpenAI 客戶端
        client = openai.Client(api_key=os.environ["OPENAI_API_KEY"])

        completion = client.chat.completions.create(
          model="gpt-3.5-turbo",
          # 構建消息列表
          messages = [
              {"role": "system", "content": "You are a human resource secretary, please answer my questions based on information I provided."},
              {"role": "system", "content": "Please provide a precise answer based on the json_data file."},
              {"role": "system", "content": json_data},
              {"role": "user", "content": user_question}
          ]
        )
        print(completion.choices[0].message.content)
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error occurred: {e}"

# 定義主函數，用於回應 OpenAI 的問題
def main_openai_response(user_question, file):
    df = pd.read_csv(file.name)
    return call_openai(user_question, df)

# 使用 Gradio 建立介面
with gr.Blocks() as demo:
    with gr.Tab("Ask OpenAI"):
        file_input = gr.File(label="Upload CSV File")
        question_input = gr.Textbox(label="Enter your question")
        openai_output = gr.Textbox(label="AI Response")
        ask_openai_button = gr.Button("Ask")
        ask_openai_button.click(fn=main_openai_response, inputs=[question_input, file_input], outputs=openai_output)

demo.launch()

Collecting gradio
  Downloading gradio-4.37.2-py3-none-any.whl (12.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai
  Downloading openai-1.35.10-py3-none-any.whl (328 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m328.3/328.3 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==1.0.2 (from gradio)
  Downloading gradio_client-1.0.2-py3-none-any.whl (318 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m



In [None]:
import os
from getpass import getpass

# 提示用戶輸入 Hugging Face API 金鑰
api_key = getpass('Enter your Hugging Face API key: ')

# 設定環境變數
os.environ['HUGGINGFACE_API_KEY'] = api_key

Enter your Hugging Face API key: ··········


In [None]:
!pip install --upgrade transformers accelerate
import accelerate

Collecting transformers
  Downloading transformers-4.42.3-py3-none-any.whl (9.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.3/9.3 MB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.32.1-py3-none-any.whl (314 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.1/314.1 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.10.0->accelerate)
  Using cached nvi

In [None]:
# 安裝所需的 Python 庫
!pip install gspread gradio openai
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import pandas as pd
import gspread
import requests
from oauth2client.service_account import ServiceAccountCredentials
import gradio as gr

torch.random.manual_seed(0)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

generation_args = {
    "max_new_tokens": 500,
    "return_full_text": False,
    "temperature": 0.0,
    "do_sample": False,
}

def generate_response(user_question):
    messages = [
        {"role": "user", "content": user_question}
    ]
    output = pipe(messages, **generation_args)
    return output[0]['generated_text']

# 使用 Gradio 建立介面
with gr.Blocks() as demo:
    gr.Markdown("## Chat with the AI Model")
    user_input = gr.Textbox(label="Enter your question")
    response_output = gr.Textbox(label="Response")
    ask_button = gr.Button("Ask")

    ask_button.click(fn=generate_response, inputs=user_input, outputs=response_output)

demo.launch()

Collecting gradio
  Downloading gradio-4.37.2-py3-none-any.whl (12.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m33.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai
  Downloading openai-1.35.10-py3-none-any.whl (328 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m328.3/328.3 kB[0m [31m26.1 MB/s[0m eta [36m0:00:00[0m
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==1.0.2 (from gradio)
  Downloading gradio_client-1.0.2-py3-none-any.whl (318 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

configuration_phi3.py:   0%|          | 0.00/11.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-4k-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi3.py:   0%|          | 0.00/73.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-4k-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/16.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.44k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.94M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://0b95e65eaa2542b8f3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


