In [None]:
!pip install transformers accelerate bitsandbytes flask pyngrok -q

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
print("Loading DeepSeek-Coder model...")
model_name = "deepseek-ai/deepseek-coder-6.7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto",
    trust_remote_code=True
)
print("Model loaded successfully!")
print(f"Ready for code completion!\n")

In [None]:
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/health', methods=['GET'])
def health():
  return jsonify({'status': 'okay'})

@app.route('/complete', methods=['POST'])
def complete():
  try:
    data = request.json
    prefix = data.get("prefix", "")
    suffix = data.get("suffix", "")

    inputs = tokenizer(prefix, return_tensors="pt").to("cuda")

    outputs = model.generate(
        **inputs,
        max_new_tokens=30,
        temperature=0.2,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=[tokenizer.eos_token_id, tokenizer.encode('\n')[0]]
    )

    result = tokenizer.decode(outputs[0], skip_special_tokens=True)

    if result.startswith(prefix):
        completion = result[len(prefix):].strip()
    else:
        completion = result.strip()

    if '\n' in completion:
        completion = completion.split('\n')[0].strip()

    return jsonify({"completion": completion, "success": True})

  except Exception as e:
      return jsonify({
        "error": str(e),
        "success": False
      }), 500

In [None]:
from pyngrok import ngrok

ngrok.set_auth_token("place-your-token-here")

public_url = ngrok.connect(5000)

print("Tunnel created!")
print(f"API URL: {public_url}")

In [None]:
from threading import Thread

def run_server():
    app.run(host='0.0.0.0', port=5000, use_reloader=False)

server_thread = Thread(target=run_server, daemon=True)
server_thread.start()

print(" Server running in background!")
print(" Cell is free, server still running")