Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow using camera as input on mobile with gpt-4 vision #302

Merged
merged 1 commit into from
Feb 14, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 68 additions & 7 deletions src/components/PromptForm/MobilePromptForm.tsx
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
import { FormEvent, KeyboardEvent, useEffect, useState, type RefObject } from "react";
import { Box, chakra, Flex } from "@chakra-ui/react";
import { Box, chakra, Flex, Image, CloseButton } from "@chakra-ui/react";
import AutoResizingTextarea from "../AutoResizingTextarea";

import { useSettings } from "../../hooks/use-settings";
import OptionsButton from "../OptionsButton";
import MicIcon from "./MicIcon";
import { isTranscriptionSupported } from "../../lib/speech-recognition";
import { useModels } from "../../hooks/use-models";
import PromptSendButton from "./PromptSendButton";
import AudioStatus from "./AudioStatus";
import { useKeyDownHandler } from "../../hooks/use-key-down-handler";

type MobilePromptFormProps = {
forkUrl: string;
onSendClick: (prompt: string) => void;
onSendClick: (prompt: string, imageUrls: string[]) => void;
inputPromptRef: RefObject<HTMLTextAreaElement>;
isLoading: boolean;
previousMessage?: string;
Expand All @@ -28,11 +29,14 @@ function MobilePromptForm({
const [prompt, setPrompt] = useState("");
// Has the user started typing?
const [isDirty, setIsDirty] = useState(false);
const { settings } = useSettings();
const { models } = useModels();
const { settings, setSettings } = useSettings();
const [isRecording, setIsRecording] = useState(false);
const [isTranscribing, setIsTranscribing] = useState(false);
const [recordingSeconds, setRecordingSeconds] = useState(0);
const inputType = isRecording || isTranscribing ? "audio" : "text";
// Base64 images
const [inputImageUrls, setInputImageUrls] = useState<string[]>([]);

// If the user clears the prompt, allow up-arrow again
useEffect(() => {
Expand Down Expand Up @@ -67,12 +71,23 @@ function MobilePromptForm({
};
}, [isRecording, recordingSeconds]);

// Update model to the supported model when inputImages is not empty
useEffect(() => {
if (inputImageUrls?.length > 0) {
const visionModel = models.find((model) => model.supportsImages);
if (visionModel && visionModel.name != settings.model.name) {
setSettings({ ...settings, model: visionModel });
}
}
}, [inputImageUrls, models, settings, setSettings]);

// Handle prompt form submission
const handlePromptSubmit = (e: FormEvent) => {
e.preventDefault();
const value = prompt.trim();
const textValue = prompt.trim();
setPrompt("");
onSendClick(value);
setInputImageUrls([]);
onSendClick(textValue, inputImageUrls);
};

const handleMetaEnter = useKeyDownHandler<HTMLTextAreaElement>({
Expand Down Expand Up @@ -133,16 +148,62 @@ function MobilePromptForm({
setIsTranscribing(false);

// Use this transcript as our prompt
onSendClick(transcription);
onSendClick(transcription, inputImageUrls);
setInputImageUrls([]);
};

const handleDeleteImage = (index: number) => {
const updatedImageUrls = [...inputImageUrls];
updatedImageUrls.splice(index, 1);
setInputImageUrls(updatedImageUrls);
};

return (
<Box flex={1} w="100%" h="100%" px={1} pt={2} pb={4}>
<chakra.form onSubmit={handlePromptSubmit} h="100%">
<Flex mt={2} pb={2} px={1} alignItems="end" gap={2}>
<OptionsButton forkUrl={forkUrl} variant="outline" iconOnly />
<OptionsButton
forkUrl={forkUrl}
variant="outline"
iconOnly
onFileSelected={(base64String) =>
setInputImageUrls((prevImageUrls) => [...prevImageUrls, base64String])
}
/>

<Box flex={1}>
<Flex flexWrap="wrap">
{inputImageUrls.map((imageUrl, index) => (
<Box
key={index}
position="relative"
height="70px"
display="flex"
alignItems="center"
m={2}
>
<Image
src={imageUrl}
alt={`Image# ${index}`}
style={{ height: "70px", objectFit: "cover" }}
cursor="pointer"
/>
<Box
key={`${index}-close`}
display="flex"
alignItems="center"
justifyContent="center"
top="0"
right="0"
backgroundColor="grey"
color="white"
height="70px"
>
<CloseButton onClick={() => handleDeleteImage(index)} />
</Box>
</Box>
))}
</Flex>
{inputType === "audio" ? (
<Box py={2} px={1}>
<AudioStatus
Expand Down
Loading