In [None]:
import s3
lecture_bottom = s3.extract('dev/Top_Lecture/', 'SON', 'ALL')

### Input Video -> S3

In [2]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

import boto3
import base64
import io
from datetime import datetime
import json
import time
from moviepy.editor import VideoFileClip
from pydub import AudioSegment
import speech_recognition as sr

import settings

# AWS S3 credentials
s3 = boto3.client('s3', 
                  aws_access_key_id=settings.DB_SETTINGS['_s3']['ACCESS_KEY_ID'],
                  aws_secret_access_key=settings.DB_SETTINGS['_s3']['ACCESS_SECRET_KEY'])
transcribe = boto3.client('transcribe', 
                          aws_access_key_id=settings.DB_SETTINGS['_s3']['ACCESS_KEY_ID'],
                          aws_secret_access_key=settings.DB_SETTINGS['_s3']['ACCESS_SECRET_KEY'])
bucket_name = settings.DB_SETTINGS['_s3']['BUCKET_NAME']

# Create a Dash app
app = dash.Dash(__name__)

# Define layout
app.layout = html.Div([
    dcc.Upload(
        id='upload-video',
        children=html.Div([
            'Drag and Drop or ',
            html.A('Select a Video')
        ]),
        style={
            'width': '100%',
            'height': '60px',
            'lineHeight': '60px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center',
            'margin': '10px'
        },
        multiple=False
    ),
    html.Div(id='output-upload')
])

# Define callback
@app.callback(Output('output-upload', 'children'),
              Input('upload-video', 'contents'))
def upload_video(contents):
    if contents is not None:
        content_type, content_string = contents.split(',')
        decoded = base64.b64decode(content_string)
        # Get current time and format it as a string
        path = 'user/video/'
        current_time = datetime.now().strftime("%Y%m%d%H%M%S")
        filename = f'{current_time[:8]}_{current_time[8:]}_user_video.mp4'
        s3.upload_fileobj(io.BytesIO(decoded), bucket_name, path + filename)
        
        # Download the video from S3
        s3.download_file(bucket_name, path + filename, 'local_video.mp4')
        
        # Extract audio from video
        video = VideoFileClip('local_video.mp4')
        video.audio.write_audiofile('extracted_audio.wav')
        
        # Transcribe audio file into text
        r = sr.Recognizer() 
        with sr.AudioFile('extracted_audio.wav') as source: 
            audio = r.record(source) 
            text = r.recognize_google(audio, language="ko-KR") 
        
        # Save the transcript to a .txt file and upload it to S3
        transcript_file = io.BytesIO(text.encode())
        s3.upload_fileobj(transcript_file, bucket_name, f'user/transcript/{current_time}_transcript.txt')
        
        return html.Div([
            'Video and transcript successfully uploaded to S3'
        ])

# Run app
if __name__ == '__main__':
    app.run_server(debug=True)

MoviePy - Writing audio in extracted_audio.wav


                                                        

MoviePy - Done.




### Video -> Audio -> Text 

In [None]:
from moviepy.editor import VideoFileClip
from pydub import AudioSegment
import speech_recognition as sr

# Extract audio from video
video = VideoFileClip("../Input_Video.mp4")
video.audio.write_audiofile("extracted_audio.wav")

# Transcribe audio file into text
r = sr.Recognizer()
audio_file = sr.AudioFile("extracted_audio.wav")

with audio_file as source:
    audio = r.record(source)
    text = r.recognize_google(audio, language="ko-KR")

print(text)

### Text -> Preprocess

### Preprocess Text -> S3

### S3 -> Dash