<a href="https://colab.research.google.com/github/so-yeon-hwang/DataSciencePractice/blob/main/SocketProgramming.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import socket
import os
import urllib.parse

def generate_http_request(url):
    parsed_url = urllib.parse.urlparse(url)
    http_request = f"GET {parsed_url.path} HTTP/1.0\r\n"
    http_request += f"Host: {parsed_url.netloc}\r\n"
    http_request += "User-Agent: HW1/1.0\r\n"
    http_request += "Connection: close\r\n\r\n"
    return http_request


def download_file(url):
    parsed_url = urllib.parse.urlparse(url) # URL 파싱
    filename = os.path.basename(parsed_url.path) # 파일명 추출

    # 호스트 이름과 포트 번호 추출
    host = parsed_url.netloc.split(':')[0]
    port = parsed_url.netloc.split(':')[1] if ':' in parsed_url.netloc else 80

    try:
      # 소켓 생성 및 연결
      client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
      client_socket.connect((host, int(port)))

      # HTTP Request 메시지 생성 및 전송
      http_request = generate_http_request(url)
      client_socket.send(http_request.encode())

      # HTTP Response 읽기
      response = b''
      while True:
          data = client_socket.recv(1024)
          if not data:
              break
          response += data

      # 헤더와 바디 분리
      header_end_index = response.find(b'\r\n\r\n')
      header = response[:header_end_index]
      body = response[header_end_index+4:]

      # HTTP 응답 상태 코드 확인
      status_line = header.split(b'\r\n', 1)[0]
      status_code = int(status_line.split()[1])

      # HTTP 응답이 200 OK가 아니면 status code를 통해 해당 에러 메시지 출력
      if status_code != 200:
          print(f"HTTP Response Status Code: {status_code}")
          return

      # Content-Length 헤더에서 파일 크기정보 추출
      content_length_index = header.find(b'Content-Length:')
      start_index = content_length_index + len(b'Content-Length:')
      end_index = header.find(b'\r\n', start_index)
      content_length = int(header[start_index:end_index].strip())

      # 파일에 응답 바디 저장
      with open(filename, 'wb') as f:
          f.write(body)

      print(http_request) # 정보 출력 4 lines

      # 소켓 연결 닫기
      client_socket.close()

      print(f"Download Complete: {filename}, {len(body)}/{content_length}")

    # 404 not found 등 예외처리
    except socket.gaierror as e:
      print(f"{host}: unknown host")
      print(f"cannot connect to server {host} {port}")

def main():
    print("Student ID : 20192754")
    print("Name : Soyeon Hwang ")

    while True:
        cmd = input("\n> ") # input

        if cmd == "q" or cmd == "quit": # quit 을 치면 프로그램 종료
            break

        tokens = cmd.strip().split()

        if len(tokens) < 2 or tokens[0].lower() != "down":
            print("Invalid command. Please use the format: down <url>")
            continue

        url = tokens[1]
        parsed_url = urllib.parse.urlparse(url)

        if parsed_url.scheme != "http": # http로 시작하지 않는 경우들을 처리
            print(f"Only support http, not {parsed_url.scheme}")
            continue

        # 모든 조건을 성립할 경우, download_file() 함수를 실행해준다
        download_file(url)

if __name__ == "__main__":
    main()

Student ID : 20192754
Name : Soyeon Hwang 

> down http://netapp.cs.kookmin.ac.kr/member/palladio.JPG
GET /member/palladio.JPG HTTP/1.0
Host: netapp.cs.kookmin.ac.kr
User-Agent: HW1/1.0
Connection: close


Download Complete: palladio.JPG, 142740/142740
